In [1]:
import os
import torch
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
from torchvision import models, transforms

In [2]:
# Loading the pretrained ResNet model
resnet = models.resnet50(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\user/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:45<00:00, 2.23MB/s]


In [3]:
list(resnet.children()) # For, testing

[Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False),
 BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
 ReLU(inplace=True),
 MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False),
 Sequential(
   (0): Bottleneck(
     (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
     (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
     (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
     (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     (relu): ReLU(inplace=True)
     (downsample): Sequential(
       (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
       (1): BatchNorm2d(256, eps=1e-05, momentum

In [4]:
# Removing the fully connected layer and the pooling layer
# Keeping layers up to the penultimate layer
feature_extractor = nn.Sequential(*list(resnet.children())[:-2])

In [5]:
# For, testing the feature extractor
input_tensor = torch.randn(1, 3, 224, 224)  # Example input (batch_size=1, 3 channels, 224x224 image)
features = feature_extractor(input_tensor)
print("Feature shape:", features.shape)

Feature shape: torch.Size([1, 2048, 7, 7])


In [6]:
dataset = "gtea"
input_folder = f"../data/{dataset}/frames/"
output_folder = f"../data/{dataset}/extracted_frame_features/"

In [9]:
os.makedirs(output_folder, exist_ok=True)

resnet = models.resnet50(pretrained=True)
feature_extractor = torch.nn.Sequential(*list(resnet.children())[:-1])  # Remove the fully connected layer
feature_extractor.eval()

# Defining preprocessing transform
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

from tqdm import tqdm_notebook

# Processing each video folder
for video_folder in tqdm_notebook(os.listdir(input_folder)):
    video_path = os.path.join(input_folder, video_folder)
    
    if not os.path.isdir(video_path):
        continue  # Skip if it's not a folder

    # Creating a corresponding folder in the output directory
    video_output_path = os.path.join(output_folder, video_folder)
    os.makedirs(video_output_path, exist_ok=True)

    print(f"Processing video folder: {video_folder}")

    # Processing each frame
    for frame_file in os.listdir(video_path):
        frame_path = os.path.join(video_path, frame_file)

        if not (frame_file.endswith(".png") or frame_file.endswith(".jpg")):
            continue

        image = Image.open(frame_path).convert("RGB")
        input_tensor = transform(image).unsqueeze(0)  # Add batch dimension

        # Extracting features
        with torch.no_grad():
            features = feature_extractor(input_tensor).squeeze().numpy()  # Removing batch and spatial dimensions

        # Saving features as .npy
        feature_file = os.path.splitext(frame_file)[0] + ".npy"
        feature_path = os.path.join(video_output_path, feature_file)
        
        np.save(feature_path, features)

    print(f"Features saved for video folder: {video_folder}")
    print()

print("Feature extraction completed!")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for video_folder in tqdm_notebook(os.listdir(input_folder)):


  0%|          | 0/22 [00:00<?, ?it/s]

Processing video folder: S1_Cheese_C1
Features saved for video folder: S1_Cheese_C1

Processing video folder: S1_Coffee_C1
Features saved for video folder: S1_Coffee_C1

Processing video folder: S1_CofHoney_C1
Features saved for video folder: S1_CofHoney_C1

Processing video folder: S1_Hotdog_C1
Features saved for video folder: S1_Hotdog_C1

Processing video folder: S1_Pealate_C1
Features saved for video folder: S1_Pealate_C1

Processing video folder: S1_Peanut_C1
Features saved for video folder: S1_Peanut_C1

Processing video folder: S1_Tea_C1
Features saved for video folder: S1_Tea_C1

Processing video folder: S2_CofHoney_C1
Features saved for video folder: S2_CofHoney_C1

Processing video folder: S2_Hotdog_C1
Features saved for video folder: S2_Hotdog_C1

Processing video folder: S2_Pealate_C1
Features saved for video folder: S2_Pealate_C1

Processing video folder: S2_Peanut_C1
Features saved for video folder: S2_Peanut_C1

Processing video folder: S2_Tea_C1
Features saved for video

### For, Testing

In [None]:
import os
import pandas as pd

In [None]:
dataset = "gtea"
gt_path = f"../data/{dataset}/action_labels/"
frames_path = f"../data/{dataset}/frames/"

In [None]:
for video_name in os.listdir(gt_path):

    if not video_name.endswith(".txt"):
        continue  

    # Ground truth label count
    with open(os.path.join(gt_path, video_name), 'r') as f:
        gt_labels = f.readlines()
    
    num_gt_labels = len(gt_labels)

    # For, testing
    print(f"Checking {video_name} - GT Labels: {num_gt_labels}")    

    # Frame count
    video_folder = os.path.join(frames_path, video_name.replace(".txt", ""))
    
    if not os.path.exists(video_folder):
        print(f"\nWarning: Frames not found for {video_name}\n")
    
        continue

    num_frames = len([f for f in os.listdir(video_folder) if f.endswith((".png", ".jpg"))])

    if num_gt_labels != num_frames:
        print(f"Mismatch for {video_name}: GT Labels = {num_gt_labels}, Frames = {num_frames}\n")

print("\nCheck completed!")

Checking S1_Cheese_C1.txt - GT Labels: 943
Checking S1_Coffee_C1.txt - GT Labels: 1178
Checking S1_CofHoney_C1.txt - GT Labels: 1235
Checking S1_Hotdog_C1.txt - GT Labels: 718
Checking S1_Pealate_C1.txt - GT Labels: 1384
Checking S1_Peanut_C1.txt - GT Labels: 1643
Checking S1_Tea_C1.txt - GT Labels: 2009
Checking S2_Cheese_C1.txt - GT Labels: 634


Checking S2_Coffee_C1.txt - GT Labels: 1814
Mismatch for S2_Coffee_C1.txt: GT Labels = 1814, Frames = 447

Checking S2_CofHoney_C1.txt - GT Labels: 823
Checking S2_Hotdog_C1.txt - GT Labels: 811
Checking S2_Pealate_C1.txt - GT Labels: 1181
Checking S2_Peanut_C1.txt - GT Labels: 1465
Checking S2_Tea_C1.txt - GT Labels: 1412
Checking S3_Cheese_C1.txt - GT Labels: 913


Checking S3_Coffee_C1.txt - GT Labels: 1190
Checking S3_CofHoney_C1.txt - GT Labels: 892
Checking S3_Hotdog_C1.txt - GT Labels: 862


Checking S3_Pealate_C1.txt - GT Labels: 1169
Checking S3_Peanut_C1.txt - GT Labels: 964


Checking S3_Tea_C1.txt - GT Labels: 1361
Checking S4_Ch

**Note:** Manually removed the mismatched and missing labels and frames