In [39]:
import os
import cv2
import numpy as np
import torch
from torch.utils.data import Dataset
from torchvision import transforms

class VideoDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.data_pairs = self._get_data_pairs()

    def _get_data_pairs(self):
        files = os.listdir(self.data_dir)
        video_files = [f for f in files if f.endswith('.mp4')]
        data_pairs = []
        
        for video_file in video_files:
            base_name = os.path.splitext(video_file)[0]
            label_file = base_name + '.txt'
            if label_file in files:
                data_pairs.append((video_file, label_file))
        
        return data_pairs

    def __len__(self):
        return len(self.data_pairs)

    def __getitem__(self, idx):
        video_file, label_file = self.data_pairs[idx]
        video_path = os.path.join(self.data_dir, video_file)
        label_path = os.path.join(self.data_dir, label_file)

        print(f"Loading video: {video_file}, label: {label_file}")

        # Read the video frames
        cap = cv2.VideoCapture(video_path)
        frames = []
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB
            if self.transform:
                frame = self.transform(frame)
            frames.append(frame)
        cap.release()
        
        # Stack frames into a single tensor
        frames = torch.stack(frames)

        # Read the labels
        with open(label_path, 'r') as f:
            labels = f.readlines()
        labels = [int(label.strip()) for label in labels]
        labels = torch.tensor(labels, dtype=torch.float32)

        return frames, labels


In [40]:
from torch.utils.data import DataLoader

# Define the transform
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomApply([transforms.ColorJitter(contrast=0.5)], p=0.5),
    transforms.RandomApply([transforms.GaussianBlur(kernel_size=3)], p=0.5),
    transforms.RandomApply([transforms.RandomErasing()], p=0.5),
    transforms.Normalize((0.5,), (0.5,))
])

# Create the dataset and dataloader
data_dir = r'C:\Users\rigel\Downloads\data'  # Directory containing videos and labels
train_dataset = VideoDataset(data_dir, transform=transform)
train_loader = DataLoader(dataset=train_dataset, batch_size=1, shuffle=True)  # Batch size 1 to handle individual video files


In [41]:
# Iterate over the data loader to verify loading
for frames, labels in train_loader:
    frames = frames.squeeze(0)
    labels = labels.squeeze(0)
    print(f"Frames shape: {frames.shape}, Labels shape: {labels.shape}")
    break  # Just to check the first batch


Loading video: data28.mp4, label: data28.txt
Frames shape: torch.Size([648, 3, 480, 640]), Labels shape: torch.Size([648])


In [48]:
import torch.nn as nn
import torch.nn.functional as F

class Steering_Model(nn.Module):
    def __init__(self):
        super(Steering_Model, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3)
        self.conv2 = nn.Conv2d(64, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 16, kernel_size=3)
        self.conv4 = nn.Conv2d(16, 8, kernel_size=3)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.3)
        
        # Calculate the size of the flattened features
        self._to_linear = None
        self.convs = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(64, 32, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 16, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(16, 8, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self._get_conv_output((3, 480, 640))
        
        self.fc1 = nn.Linear(self._to_linear, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 16)
        self.fc4 = nn.Linear(16, 1)

    def _get_conv_output(self, shape):
        o = self.convs(torch.rand(1, *shape))
        self._to_linear = int(np.prod(o.size()))

    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)
        return x

# Example usage
model = Steering_Model()
summary(model, (3, 480, 640))
import torch.nn as nn
import torch.nn.functional as F

class Steering_Model(nn.Module):
    def __init__(self):
        super(Steering_Model, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3)
        self.conv2 = nn.Conv2d(64, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 16, kernel_size=3)
        self.conv4 = nn.Conv2d(16, 8, kernel_size=3)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.3)
        
        # Calculate the size of the flattened features
        self._to_linear = None
        self.convs = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(64, 32, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 16, kernel_size=3),
            nn.ReLU(),
            nn.Conv2d(16, 8, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self._get_conv_output((3, 480, 640))
        
        self.fc1 = nn.Linear(self._to_linear, 16)
        self.fc2 = nn.Linear(16, 16)
        self.fc3 = nn.Linear(16, 16)
        self.fc4 = nn.Linear(16, 1)

    def _get_conv_output(self, shape):
        o = self.convs(torch.rand(1, *shape))
        self._to_linear = int(np.prod(o.size()))

    def forward(self, x):
        x = self.convs(x)
        x = x.view(-1, self._to_linear)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)
        return x

# Example usage
model = Steering_Model()
summary(model, (3, 480, 640))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 478, 638]           1,792
              ReLU-2         [-1, 64, 478, 638]               0
            Conv2d-3         [-1, 32, 476, 636]          18,464
              ReLU-4         [-1, 32, 476, 636]               0
         MaxPool2d-5         [-1, 32, 238, 318]               0
            Conv2d-6         [-1, 16, 236, 316]           4,624
              ReLU-7         [-1, 16, 236, 316]               0
            Conv2d-8          [-1, 8, 234, 314]           1,160
              ReLU-9          [-1, 8, 234, 314]               0
        MaxPool2d-10          [-1, 8, 117, 157]               0
           Linear-11                   [-1, 16]       2,351,248
           Linear-12                   [-1, 16]             272
           Linear-13                   [-1, 16]             272
          Dropout-14                   

In [49]:
from torchsummary import summary

# Define the model
model = Steering_Model()

# Print the model summary
summary(model, (3, 480, 640))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 478, 638]           1,792
              ReLU-2         [-1, 64, 478, 638]               0
            Conv2d-3         [-1, 32, 476, 636]          18,464
              ReLU-4         [-1, 32, 476, 636]               0
         MaxPool2d-5         [-1, 32, 238, 318]               0
            Conv2d-6         [-1, 16, 236, 316]           4,624
              ReLU-7         [-1, 16, 236, 316]               0
            Conv2d-8          [-1, 8, 234, 314]           1,160
              ReLU-9          [-1, 8, 234, 314]               0
        MaxPool2d-10          [-1, 8, 117, 157]               0
           Linear-11                   [-1, 16]       2,351,248
           Linear-12                   [-1, 16]             272
           Linear-13                   [-1, 16]             272
          Dropout-14                   

In [50]:
import torch.optim as optim

# Model initialization
model = Steering_Model()

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for batch in train_loader:
        images, labels = batch
        images = images.squeeze(0)
        labels = labels.squeeze(0)
        batch_loss = 0
        optimizer.zero_grad()
        
        # Iterate over each frame and label
        for i in range(images.size(0)):
            frame = images[i].unsqueeze(0)  # Get the ith frame and add batch dimension
            label = labels[i].view(-1, 1)  # Get the ith label and reshape
            output = model(frame)
            loss = criterion(output, label)
            loss.backward()
            optimizer.step()
            batch_loss += loss.item()
        
        running_loss += batch_loss / images.size(0)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}")


Loading video: data13.mp4, label: data13.txt


KeyboardInterrupt: 

In [None]:
# Testing loop (use actual test dataset and loader)
model.eval()
with torch.no_grad():
    total = 0
    correct = 0
    for batch in train_loader:  # Use test_loader in practice
        images, labels = batch
        images = images.squeeze(0)
        labels = labels.squeeze(0)
        
        # Iterate over each frame and label
        for i in range(images.size(0)):
            frame = images[i].unsqueeze(0)  # Get the ith frame and add batch dimension
            label = labels[i].view(-1, 1)  # Get the ith label and reshape
            output = model(frame)
            predicted = output.round().long()
            total += 1
            correct += (predicted == label.long()).sum().item()

    print(f"Accuracy of the model: {100 * correct / total} %")
