In [None]:
import cv2
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import numpy as np


In [None]:
# Load the trained model
model = models.resnet18(pretrained=False)
num_classes = 4  # Change this to match the number of classes in your training data
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)
model.load_state_dict(torch.load('model_checkpoint.pth'))
model.eval()

# Define class names
class_names = ['blank', 'rock', 'paper', 'scissors']


In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [None]:
# Open the video file
video_path = '' # path to a video 
cap = cv2.VideoCapture(video_path)
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter('' , cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height)) # input the output path as the first parameter

# Process video frames
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # Preprocess the frame
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame_pil = Image.fromarray(frame)
    frame_tensor = transform(frame_pil).unsqueeze(0)
    
    # Perform inference
    with torch.no_grad():
        outputs = model(frame_tensor)
    _, predicted_class = torch.max(outputs, 1)
    predicted_label = class_names[predicted_class.item()]  # Use the predicted class index
    
    # Overlay predicted label on the frame
    overlay = frame.copy()
    cv2.putText(overlay, predicted_label, (30, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 3)
    cv2.addWeighted(overlay, 0.6, frame, 0.4, 0, frame)
    
    out.write(frame)  # Write the processed frame to the output video

cap.release()
out.release()