In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import f1_score
import pandas as pd

# Define the Dataset class for multi-view data
class MultiViewDataset(Dataset):
    def __init__(self, dirs, sequence_length=300, is_test=False):
        self.dirs = dirs
        self.sequence_length = sequence_length
        self.is_test = is_test

        self.files = []
        for view, dir_path in dirs.items():
            for file_name in os.listdir(dir_path):
                if file_name.endswith('.npy'):
                    file_path = os.path.join(dir_path, file_name)
                    if not is_test:
                        class_idx = int(file_name.split('_')[0])  # Adjust based on filename format
                        self.files.append((file_path, class_idx))
                    else:
                        self.files.append((file_path, None))

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        file_path, class_idx = self.files[idx]
        data = np.load(file_path, allow_pickle=True).item()

        # Load features for each view
        features = {view: data['resnet'] for view, _ in self.dirs.items()}

        # Pad or truncate sequences to the fixed length
        for view in features:
            if len(features[view]) < self.sequence_length:
                pad_length = self.sequence_length - len(features[view])
                features[view] = np.pad(features[view], ((0, pad_length), (0, 0)), mode='constant', constant_values=0)
            elif len(features[view]) > self.sequence_length:
                features[view] = features[view][:self.sequence_length]

        features = {view: torch.tensor(features[view], dtype=torch.float32) for view in features}

        if not self.is_test:
            return features, class_idx
        else:
            return features, os.path.basename(file_path)

# Define the model
class MultiViewModel(nn.Module):
    def __init__(self):
        super(MultiViewModel, self).__init__()
        self.fc1 = nn.Linear(2048, 512)
        self.fc2 = nn.Linear(512, 6)

    def forward(self, x):
        x = x.mean(dim=1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Set up directories
train_dirs = {
    'frontal': 'dataset_files/r2+1d_training_features/R(2+1)D_Training_Features/frontal_view',
    'left': 'dataset_files/r2+1d_training_features/R(2+1)D_Training_Features/left_side_mirror_view',
    'right': 'dataset_files/r2+1d_training_features/R(2+1)D_Training_Features/right_side_mirror_view'
}

val_dirs = {
    'frontal': 'dataset_files/r2+1d_val_features/frontal_view',
    'left': 'dataset_files/r2+1d_val_features/left_side_mirror_view',
    'right': 'dataset_files/r2+1d_val_features/right_side_mirror_view'
}

test_dirs = {
    'frontal': 'dataset_files/r2+1d_test_features/frontal_view',
    'left': 'dataset_files/r2+1d_test_features/left_side_mirror_view',
    'right': 'dataset_files/r2+1d_test_features/right_side_mirror_view'
}

# Hyperparameters
batch_size = 32
sequence_length = 300
num_epochs = 100
learning_rate = 0.001

# Create datasets and dataloaders
train_dataset = MultiViewDataset(train_dirs, sequence_length)
val_dataset = MultiViewDataset(val_dirs, sequence_length)
test_dataset = MultiViewDataset(test_dirs, sequence_length, is_test=True)

def collate_fn(batch):
    features, labels = zip(*batch)
    max_length = max(f['frontal'].size(0) for f in features)
    padded_features = {view: [] for view in features[0]}

    for f in features:
        for view in f:
            if f[view].size(0) < max_length:
                padded_features[view].append(
                    torch.cat([f[view], torch.zeros(max_length - f[view].size(0), f[view].size(1))], dim=0)
                )
            else:
                padded_features[view].append(f[view])

    features_tensor = {view: torch.stack(padded_features[view]) for view in padded_features}

    if labels[0] is not None:
        labels_tensor = torch.tensor(labels, dtype=torch.long)
        return features_tensor, labels_tensor
    else:
        return features_tensor, None

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

# Initialize the model, loss function, and optimizer
model = MultiViewModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Define the training function
def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(num_epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0

        for inputs, labels in train_loader:
            inputs = {view: inputs[view].to(device) for view in inputs}
            labels = labels.to(device)
            optimizer.zero_grad()

            outputs = model(inputs['frontal'])
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs['frontal'].size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = correct / total
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

        # Validation
        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        val_labels, val_preds = [], []

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = {view: inputs[view].to(device) for view in inputs}
                labels = labels.to(device)

                outputs = model(inputs['frontal'])
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs['frontal'].size(0)
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

                val_labels.extend(labels.cpu().numpy())
                val_preds.extend(predicted.cpu().numpy())

        val_epoch_loss = val_loss / len(val_loader.dataset)
        val_epoch_acc = val_correct / val_total
        val_f1 = f1_score(val_labels, val_preds, average='weighted')
        print(f"Validation Loss: {val_epoch_loss:.4f}, Validation Accuracy: {val_epoch_acc:.4f}, Validation F1 Score: {val_f1:.4f}")

# Train the model
train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs)

# Generate CSV output for test set
def generate_test_csv(model, dataloader, output_file):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    all_preds, all_files = [], []

    with torch.no_grad():
        for inputs, file_names in dataloader:
            inputs = {view: inputs[view].to(device) for view in inputs}
            outputs = model(inputs['frontal'])
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_files.extend(file_names)

    class_names = ["left_turn", "right_turn", "lane_change_left", "lane_change_right", "straight", "slow_or_stop"]
    one_hot_preds = np.eye(len(class_names))[all_preds]
    df = pd.DataFrame(one_hot_preds, columns=class_names)
    df.insert(0, 'video_name', all_files)

    df.to_csv(output_file, index=False)

output_file = 'test_predictions.csv'
generate_test_csv(model, test_loader, output_file)
