In [1]:
import os
import subprocess
import pandas as pd
import re
import json
import cv2 #pip install opencv-python
import torch
import torchvision.transforms as transforms

from torchvision.models import resnet50
from torchvision.io import read_image
from torch.utils.data import DataLoader, Dataset

## Video to Image

In [2]:
video_path = "../data/left_quarter.mp4"

output_dir = os.path.join("../data", "extracted_frames")
os.makedirs(output_dir, exist_ok=True)

cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
  print("Error opening video!")
  exit()

fps = int(cap.get(cv2.CAP_PROP_FPS))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

frame_count = 1
actual_count = 0

while True:
  ret, frame = cap.read()

  if not ret:
    print("Can't receive frame (stream end?). Exiting...")
    break

  # Save frame as numbered image
  if frame_count % fps == 0:
    image_path = os.path.join(output_dir, f"{frame_count}.png")
    cv2.imwrite(image_path, frame)
    actual_count += 1

  frame_count += 1

  if frame_count > total_frames:
    break

cap.release()

print(f"Extracted {actual_count} frames to {output_dir}")

Can't receive frame (stream end?). Exiting...
Extracted 1788 frames to ../data/extracted_frames


## Image to Features

In [3]:
# Subclass of Dataset to make things more custom yeah
class CustomImageDataset(Dataset):
    """A custom dataset to load images."""
    def __init__(self, img_dir, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        self.img_names = os.listdir(img_dir)

    def __len__(self):
        return len(self.img_names)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_names[idx])
        image = read_image(img_path)
        if self.transform:
            image = self.transform(image)
        return image, self.img_names[idx], idx

In [4]:
# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet requires 224x224 images
    transforms.ConvertImageDtype(torch.float),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # This is what it used for training
])

In [5]:
# Initialize Dataset and DataLoader
img_dir = '../data/extracted_frames'
dataset = CustomImageDataset(img_dir=img_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

In [6]:
# Load ResNet model
model = resnet50(pretrained=True)
model.eval()  # Set model to evaluation mode let's go
model = torch.nn.Sequential(*(list(model.children())[:-1]))



In [7]:
# Feature extraction
features = []
for images, names, indices in dataloader:
    with torch.no_grad(): # Since inferencing
        output = model(images)
        output = output.view(output.size(0), -1) # Turn 4D into 2D [batch, features]
    features.append({"index": indices.item(), "filename": names[0], "features": output.numpy().flatten().tolist()})

# Export features to a JSON file
with open('../data/image_features.json', 'w') as f:
    json.dump(features, f)

print("Feature extraction completed.")

Feature extraction completed.
