In [2]:
import cv2
import numpy as np
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
class SimpleCrashNN(nn.Module):
    def __init__(self):
        super(SimpleCrashNN, self).__init__()
        self.conv1 = nn.Conv3d(in_channels=3, out_channels=8, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm3d(8)
        self.conv2 = nn.Conv3d(in_channels=8, out_channels=16, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm3d(16)
        self.pool = nn.AdaptiveAvgPool3d(1)
        self.fc = nn.Linear(16, 1)
    
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))  # shape: (B, 8, T, H, W)
        x = F.relu(self.bn2(self.conv2(x)))  # shape: (B, 16, T, H, W)
        x = self.pool(x)                     # shape: (B, 16, 1, 1, 1)
        x = x.view(x.size(0), -1)            # flatten to (B, 16)
        x = self.fc(x)                       # shape: (B, 1)
        return torch.sigmoid(x)              # probability in [0, 1]

# Instantiate the model and move it to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCrashNN().to(device)
model.eval()  # Set model to evaluation mode
print("Simple custom NN initialized and set to eval mode.")

Simple custom NN initialized and set to eval mode.


In [4]:
# Cell 3: Load training and test CSV files
train_df = pd.read_csv('../DATA/train.csv')
test_df = pd.read_csv('../DATA/test.csv')
print("Training data loaded. Number of training videos:", len(train_df))
print("Test data loaded. Number of test videos:", len(test_df))

Training data loaded. Number of training videos: 1500
Test data loaded. Number of test videos: 1344


In [5]:
def extract_frames(video_path, num_frames=16, resize=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video file: {video_path}")
        return None

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
    
    frames = []
    for idx in range(total_frames):
        ret, frame = cap.read()
        if not ret:
            break
        if idx in frame_indices:
            frame = cv2.resize(frame, resize)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = frame.astype(np.float32) / 255.0  # Normalize pixels to [0, 1]
            frames.append(frame)
    cap.release()
    
    if len(frames) < num_frames:
        # Duplicate last frame if video is too short
        while len(frames) < num_frames:
            frames.append(frames[-1])
    
    # Convert frames to tensor and rearrange: (T, H, W, C) -> (B, C, T, H, W)
    frames = np.stack(frames, axis=0)           # (T, H, W, C)
    frames = np.transpose(frames, (3, 0, 1, 2))   # (C, T, H, W)
    frames_tensor = torch.from_numpy(frames).unsqueeze(0)
    return frames_tensor

In [6]:
def predict_video(video_path):
    frames_tensor = extract_frames(video_path, num_frames=16, resize=(224,224))
    if frames_tensor is None:
        return 0.0  # Default probability if video processing fails
    frames_tensor = frames_tensor.to(device)
    with torch.no_grad():
        output = model(frames_tensor)  # Model output shape: (B, 1)
        prob = output.item()
    return prob

In [7]:
# Cell 6: Generate predictions for training videos
train_predictions = []

# Training videos are stored in "train/" folder with filenames "<id>.mp4"
for idx, row in train_df.iterrows():
    # Convert video ID to an integer and format with leading zeros (5 digits)
    video_id = int(float(row['id']))
    video_filename = f"{video_id:05d}.mp4"  # e.g., 01924.mp4
    video_path = os.path.join("../DATA/train", video_filename)
    prob = predict_video(video_path)
    train_predictions.append(prob)
    if idx % 50 == 0:
        print(f"Processed {idx} training videos...")

train_df['predicted_score'] = train_predictions
print("Training predictions generated.")

Processed 0 training videos...
Processed 50 training videos...
Processed 100 training videos...
Processed 150 training videos...
Processed 200 training videos...
Processed 250 training videos...
Processed 300 training videos...
Processed 350 training videos...
Processed 400 training videos...
Processed 450 training videos...
Processed 500 training videos...
Processed 550 training videos...
Processed 600 training videos...
Processed 650 training videos...
Processed 700 training videos...
Processed 750 training videos...
Processed 800 training videos...
Processed 850 training videos...
Processed 900 training videos...
Processed 950 training videos...
Processed 1000 training videos...
Processed 1050 training videos...
Processed 1100 training videos...
Processed 1150 training videos...
Processed 1200 training videos...
Processed 1250 training videos...
Processed 1300 training videos...
Processed 1350 training videos...
Processed 1400 training videos...
Processed 1450 training videos...
Tra

In [8]:
# Cell 7: Generate predictions for test videos
test_predictions = []

# Test videos are stored in "test/" folder with filenames "<id>.mp4"
for idx, row in test_df.iterrows():
    video_id = int(float(row['id']))
    video_filename = f"{video_id:05d}.mp4"  # Format with 5 digits
    video_path = os.path.join("../DATA/test", video_filename)
    prob = predict_video(video_path)
    test_predictions.append(prob)
    if idx % 50 == 0:
        print(f"Processed {idx} test videos...")

test_df['score'] = test_predictions
print("Test predictions generated.")

Processed 0 test videos...
Processed 50 test videos...
Processed 100 test videos...
Processed 150 test videos...
Processed 200 test videos...
Processed 250 test videos...
Processed 300 test videos...
Processed 350 test videos...
Processed 400 test videos...
Processed 450 test videos...
Processed 500 test videos...
Processed 550 test videos...
Processed 600 test videos...
Processed 650 test videos...
Processed 700 test videos...
Processed 750 test videos...
Processed 800 test videos...
Processed 850 test videos...
Processed 900 test videos...
Processed 950 test videos...
Processed 1000 test videos...
Processed 1050 test videos...
Processed 1100 test videos...
Processed 1150 test videos...
Processed 1200 test videos...
Processed 1250 test videos...
Processed 1300 test videos...
Test predictions generated.


In [9]:
submission = test_df[['id', 'score']]
submission.to_csv('submission.csv', index=False)
print("Submission file 'submission.csv' created successfully.")

Submission file 'submission.csv' created successfully.


In [10]:
submission

Unnamed: 0,id,score
0,204,0.509484
1,30,0.510554
2,146,0.509772
3,20,0.509088
4,511,0.508558
...,...,...
1339,2621,0.506849
1340,2858,0.507239
1341,2625,0.507869
1342,2791,0.507738
