This notebook does frame-by-frame classification for multiple races and saves the total counts to a csv file

In [20]:
import torch
from torchvision import transforms
import torch.nn as nn
from PIL import Image
import os
import cv2
import numpy as np
import pandas as pd
from scipy.stats import mode
from torchvision import models
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

Load in Model

In [None]:
#transformation that is compatible with resnet-18
class CenterSquareCrop:
    def __call__(self, img):
        width, height = img.size
        min_dim = min(width, height)
        left = (width - min_dim) // 2
        top = (height - min_dim) // 2
        right = left + min_dim
        bottom = top + min_dim
        return img.crop((left, top, right, bottom))

# Updated transform pipeline
transform = transforms.Compose([
    CenterSquareCrop(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet mean
                         std=[0.229, 0.224, 0.225]),   # ImageNet std
])

# Reconstruct the same architecture
def build_model(num_classes=5):
    model = models.resnet18(pretrained=True)

    # Freeze all layers
    for param in model.parameters():
        param.requires_grad = False

    # Replace the final classification layer
    model.fc = nn.Sequential(
        nn.Linear(model.fc.in_features, 256),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(256, num_classes)
    )

    # Unfreeze the classifier head
    for param in model.fc.parameters():
        param.requires_grad = True

    return model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = build_model(num_classes=5).to(device)
model.load_state_dict(torch.load("C:/Users/fires/Downloads/final_model_v2_weight.pth", map_location=device))
model.eval() 

### Sliding window with majority vote to smooth prediction noise

In [22]:
def mode_filter(arr, window_size):
    results = np.empty_like(arr)
    for i in range(window_size // 2, len(arr) - window_size // 2):
            window = arr[i-window_size // 2:i+window_size // 2]
            mode_result = mode(window, keepdims=True)
            if mode_result.count[0] == 1:  # Tie
                    results[i] = arr[i]  # Use original
            else:
                    results[i] = mode_result.mode[0]
    return results

Frame-by-frames from each race should be in a named folder within the "path" folder (in this case 2024Frames) 

In [None]:
df = pd.DataFrame(columns=["Track","DistNoCar", "Front", "Inside", "Rear", "Side"])

path = "C:/Users/fires/Downloads/2024Frames"
for folder in os.listdir(path):
    folder_path = os.path.join(path, folder)
    frame_paths = [folder_path + '/' + file for file in os.listdir(folder_path) if file.lower().endswith('.jpg')]
    predictions = np.empty((0,), dtype=np.uint8)

    for frame in frame_paths:
        image = Image.open(frame).convert('RGB')
        image = transform(image).unsqueeze(0).to(device)

        with torch.no_grad():
            output = model(image)
            _, predicted_class = torch.max(output, 1)
            predicted_class_num = predicted_class.item()
            predictions = np.append(predictions, predicted_class_num)

    predictions = mode_filter(predictions, 7) # initial mode filter to clear up larger noise 
    predictions = mode_filter(predictions, 3) # mode filter to clear up individual frames that are misclassified

    filtered = predictions[np.isin(predictions, [0, 1, 2, 3, 4])]
    counts = np.bincount(filtered, minlength=5)
    print(counts)
    df.loc[len(df)] = [folder] + counts.tolist()

Save Data

In [24]:
df.to_csv('angle_counts.csv')
print(df)

                     Track  DistNoCar  Front  Inside  Rear  Side
0         Abu Dhabi_frames       4733   2298    3191   565  1440
1         Australia_frames       4566   2839    2346  1495  1109
2          Austrian_frames       4263   2823    1922   976  2020
3        Azerbaijan_frames       4153   2627    3217  1561   707
4           Bahrain_frames       4211   2200    2260   898  1474
5           Belgian_frames       5170   3241    1517   832  1459
6           British_frames       6293   1516    1866   901  1571
7            Canada_frames       5655   2891    1819   944   959
8           Chinese_frames       4342   3003    2273  1207  1409
9             Dutch_frames       5148   2035    2125  1210  1488
10   Emilia Romanga_frames       3703   2759    3486  1155   900
11        Hungarian_frames       4131   2486    1966  1589  2057
12          Italian_frames       3748   3558    2814   670  1559
13         Japanese_frames       4158   2813    2452   916  1706
14        Las Vegas_frame