In [2]:
# !wget http://places2.csail.mit.edu/models_places365/resnet50_places365.pth.tar

--2025-05-13 21:13:30--  http://places2.csail.mit.edu/models_places365/resnet50_places365.pth.tar
Resolving places2.csail.mit.edu (places2.csail.mit.edu)... 128.52.132.120
Connecting to places2.csail.mit.edu (places2.csail.mit.edu)|128.52.132.120|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 97270159 (93M) [application/x-tar]
Saving to: ‘resnet50_places365.pth.tar’


2025-05-13 21:13:35 (19.7 MB/s) - ‘resnet50_places365.pth.tar’ saved [97270159/97270159]



In [3]:
# !wget https://raw.githubusercontent.com/csailvision/places365/master/categories_places365.txt

--2025-05-13 21:15:28--  https://raw.githubusercontent.com/csailvision/places365/master/categories_places365.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6833 (6.7K) [text/plain]
Saving to: ‘categories_places365.txt’


2025-05-13 21:15:28 (6.35 MB/s) - ‘categories_places365.txt’ saved [6833/6833]



In [5]:
# Cell 1: Imports and Setup with GPU and tqdm
import torch
import torchvision.models as models
from torchvision import transforms
from PIL import Image
import cv2
import numpy as np
import os
import subprocess
import glob
from natsort import natsorted
import csv
from tqdm import tqdm  # <-- NEW

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Root directory to store model + label files
ROOT_DIR = 'scene_classification'
os.makedirs(ROOT_DIR, exist_ok=True)

# URLs and paths
model_url = "http://places2.csail.mit.edu/models_places365/resnet50_places365.pth.tar"
label_url = "https://raw.githubusercontent.com/csailvision/places365/master/categories_places365.txt"

model_path = os.path.join(ROOT_DIR, "resnet50_places365.pth.tar")
label_path = os.path.join(ROOT_DIR, "categories_places365.txt")

# Download if missing
if not os.path.exists(model_path):
    subprocess.run(["wget", "-O", model_path, model_url])

if not os.path.exists(label_path):
    subprocess.run(["wget", "-O", label_path, label_url])

Using device: cuda


In [6]:
# Cell 2: Model loading and classification functions (with GPU support)
def load_places365_resnet50(model_path):
    model = models.resnet50(num_classes=365)
    checkpoint = torch.load(model_path, map_location=device)
    state_dict = {k.replace('module.', ''): v for k, v in checkpoint['state_dict'].items()}
    model.load_state_dict(state_dict)
    model.to(device)
    model.eval()
    return model

def load_categories(label_path):
    with open(label_path) as f:
        categories = [line.strip().split(' ')[0][3:] for line in f]
    return categories

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

def classify_scene_from_frame_folder(frame_folder, model, categories, num_frames=16):
    frame_paths = glob.glob(os.path.join(frame_folder, "*.jpg"))
    frame_paths = natsorted(frame_paths)
    
    total = len(frame_paths)
    if total == 0:
        return None

    indices = np.linspace(0, total - 1, min(num_frames, total), dtype=int)
    selected_paths = [frame_paths[i] for i in indices]

    probs = []
    for path in selected_paths:
        img = Image.open(path).convert("RGB")
        input_tensor = transform(img).unsqueeze(0).to(device)
        with torch.no_grad():
            output = model(input_tensor)
            prob = torch.nn.functional.softmax(output[0], dim=0)
            probs.append(prob.cpu())  # Move to CPU for stacking

    avg_prob = torch.stack(probs).mean(dim=0)
    top_idx = avg_prob.argmax().item()
    return categories[top_idx]


In [None]:
# Cell 3: Loop through all videos and classify scenes with tqdm
ucf_root = "datasets/UCF-101-JPG"  # <-- adjust if needed
output_csv = os.path.join(ROOT_DIR, "scene_classification_results.csv")

model = load_places365_resnet50(model_path)
categories = load_categories(label_path)

results = []

# Traverse UCF-101-JPG/{class_name}/{video_folder}/
class_dirs = sorted([d for d in os.listdir(ucf_root) if os.path.isdir(os.path.join(ucf_root, d))])

for class_dir in tqdm(class_dirs, desc="Classes"):
    class_path = os.path.join(ucf_root, class_dir)
    video_dirs = sorted([v for v in os.listdir(class_path) if os.path.isdir(os.path.join(class_path, v))])
    
    for video_dir in video_dirs:
        video_path = os.path.join(class_path, video_dir)
        relative_video_id = f"{class_dir}/{video_dir}"
        try:
            scene_label = classify_scene_from_frame_folder(video_path, model, categories)
            if scene_label is not None:
                results.append((relative_video_id, scene_label))
            else:
                print(f"Skipped (no frames): {relative_video_id}")
        except Exception as e:
            print(f"Error processing {relative_video_id}: {e}")

# Write to CSV
with open(output_csv, mode='w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['video_id', 'scene_label'])
    writer.writerows(results)

print(f"\n✅ Scene classification complete. CSV saved to: {output_csv}")

Classes:   0%|          | 0/102 [00:00<?, ?it/s]