In [52]:
import torch
from torchvision import models
import cv2
import os
import numpy as np

In [53]:
# Function to preprocess a frame (replace with your actual preprocessing logic)
def preprocess_frame(frame):
  # Resize the frame to the expected input size of the model (e.g., 224x224 for ResNet-50)
  resized_frame = cv2.resize(frame, (224, 224))

  # Normalize pixel values (common approach is 0-1 normalization)
  normalized_frame = resized_frame.astype(float) / 255.0

  # Convert the frame to a PyTorch tensor and add a batch dimension (unsqueeze)
  frame_tensor = torch.unsqueeze(torch.from_numpy(normalized_frame), dim=0)
  return frame_tensor.float()



In [54]:
# Define the pre-trained model (ResNet-50 in this example)
model = models.resnet50(pretrained=True)

# Set the model to evaluation mode (optional, but recommended for inference)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [55]:
# List to store predictions for each frame (label, confidence score)
model_name = "resnet"
results = {}
for folders in os.listdir(f'data/frames'):
  output_directory = f'data/results/{model_name}/{folders}'
  if not os.path.exists(output_directory):
      os.makedirs(output_directory)
  predictions = {}
  folder_path = os.path.join('data/frames', folders)
  for j, file in enumerate(os.listdir(folder_path)):
    frame = cv2.imread(os.path.join(folder_path, file))

    preprocessed_frame = preprocess_frame(frame)
    preprocessed_frame = preprocessed_frame.permute(0, 3, 1, 2)
    # Predict with the model
    with torch.no_grad():  # Deactivate gradient calculation for efficiency
      output = model(preprocessed_frame)

    # draw the predicted bounding box
    # Get the predicted label and confidence score (modify based on model output)
    predicted_label = torch.argmax(output, dim=1).item()
    confidence_score = torch.nn.functional.softmax(output, dim=1)[0][predicted_label].item()
    
    if predicted_label not in predictions:
        predictions[predicted_label] = confidence_score
    else:
        predictions[predicted_label] = max(predictions[predicted_label], confidence_score)

  results[folders] = predictions
print("Finished processing frames")


Finished processing frames


In [56]:
labels = []
with open('imagenet_labels.txt') as f:
    labels = [line.strip() for line in f.readlines()]
                
for folder in results:
    print(f"Results for folder {folder}")
    vr = results[folder]
    vr = dict(sorted(vr.items(), key=lambda item: item[1], reverse=True))
    for label in vr:
        print(f"{labels[label]}: {vr[label]}")
        with open(f'data/results/{model_name}/{folder}/results.txt', 'a') as f:
            f.write(f"{labels[label]}: {vr[label]}\n")
    print("\n")

Results for folder video_1
pole: 0.7998195886611938
envelope: 0.761699378490448
amphibian: 0.6703803539276123
suit: 0.5689117908477783
book_jacket: 0.4577503204345703
convertible: 0.3329264223575592
hourglass: 0.31460025906562805
harmonica: 0.18626734614372253
cleaver: 0.17437012493610382
web_site: 0.1685749590396881
bulletproof_vest: 0.1554824262857437
parachute: 0.14476202428340912
tripod: 0.14232376217842102
microphone: 0.1297016739845276
paper_towel: 0.12604686617851257
cowboy_hat: 0.11213955283164978
hoopskirt: 0.1101936623454094
studio_couch: 0.0905168280005455
water_bottle: 0.06357701867818832
cradle: 0.06275518983602524
nipple: 0.05435248091816902


Results for folder video_2
military_uniform: 0.8409576416015625
barbershop: 0.7755054235458374
hourglass: 0.6089584827423096
wardrobe: 0.5716427564620972
airliner: 0.5346886515617371
neck_brace: 0.4891362190246582
electric_fan: 0.4792817533016205
crane: 0.42762497067451477
swing: 0.4251325726509094
cab: 0.411395400762558
television: