# Transform and Embed Training Data

## Dependencies

In [1]:
#!pip install opencv-python
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import gzip
import matplotlib.pyplot as plt
from google.colab import drive
from google.colab import files

import torch
import os
import cv2
from torchvision import transforms, datasets, models
from torchvision.models import alexnet, AlexNet_Weights, vgg16, VGG16_Weights, resnet18, ResNet18_Weights
from torch.utils.data import DataLoader, SubsetRandomSampler
import torch.optim as optim

from sklearn.model_selection import train_test_split
import shutil

In [2]:
drive.mount('/content/Drive')

Mounted at /content/Drive


## Data Pre-processing

### Extract frames from videos and store in hierarchical folder

Only needs to be run twice to extract frames from videos.

In [3]:
def extract_frames_from_all_videos(source_folder, target_folder):

    # Create target directories if they don't exist
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)

    videos = [file for file in os.listdir(source_folder) if file.endswith('.MOV')]

    for video in videos:
        # Extract label from video filename (e.g., '1.MOV' -> '1'), cause they didn't upload in right order
        label = os.path.splitext(video)[0]
        video_path = os.path.join(source_folder, video)
        output_folder = os.path.join(target_folder, f'label_{label}')
        extract_frames(video_path, output_folder)

def extract_frames(video_path, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    cap = cv2.VideoCapture(video_path)
    count = 0

    while True:
        success, frame = cap.read()
        if not success:
            break

        frame_filename = f"{output_folder}/frame_{count:04d}.jpg"
        cv2.imwrite(frame_filename, frame)
        count += 1

    cap.release()
    print(f"Extracted {count} frames from {video_path} into {output_folder}")

In [4]:
source_folder = '/content/Drive/MyDrive/Spring Term/6.8301: Advances in Computer Vision/Final_Project/Unprocessed_Data'
target_folder = '/content/Drive/MyDrive/Spring Term/6.8301: Advances in Computer Vision/Final_Project/Processed_Data'
extract_frames_from_all_videos(source_folder, target_folder)

FileNotFoundError: [Errno 2] No such file or directory: '/content/Drive/MyDrive/Spring Term/6.8301: Advances in Computer Vision/Final_Project/Unprocessed_Data'

### Load data, using transforms

After extracting frames from the videos, run this to get torch dataloaders with all training, validation, and testing set data.

In [None]:
data_folder = '/content/Drive/MyDrive/Spring Term/6.8301: Advances in Computer Vision/Final_Project/Processed_Data'
test_folder = '/content/Drive/MyDrive/Spring Term/6.8301: Advances in Computer Vision/Final_Project/New_Test_Frames'

In [None]:
data_folder = '/content/Drive/MyDrive/Colab Notebooks/Final_Project/Processed_Data'
test_folder = '/content/Drive/MyDrive/Colab Notebooks/Final_Project/Processed_Data'

In [None]:
# transform function for
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# get training, validation, and testing datasets as dataloaders
torch.manual_seed(2024)
dataset = datasets.ImageFolder(data_folder, transform=transform)
test_data = datasets.ImageFolder(test_folder, transform=transform)

targets = [sample[1] for sample in dataset.samples]
train_idx, valid_idx, _, _ = train_test_split(
    np.arange(len(targets)),  # indices to split
    targets,                  # classes to stratify by
    test_size=0.2,            # 20% for validation
    random_state=42,          # random state for reproducibility
    stratify=targets          # stratify by target labels
)

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_dataloader = DataLoader(dataset, batch_size=32, sampler=train_sampler)
val_dataloader = DataLoader(dataset, batch_size=32, sampler=valid_sampler)
test_dataloader = DataLoader(test_data, batch_size=1)

## Torch Training Loop, Model Loaders

In [None]:
def load_resnet_headless():
    # load resnet-18, remove fc layer
    model = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
    model = torch.nn.Sequential(*list(model.children())[:-1])

    # freeze params (not technically necessary)
    for param in model.parameters():
        param.requires_grad = False

    return model

def load_alexnet_headless():
    # load alexnet, remove fc layer
    model = alexnet(weights=AlexNet_Weights.IMAGENET1K_V1)
    model.classifier = torch.nn.Sequential(*list(model.classifier.children())[:-1])

    # freeze params (not technically necessary)
    for param in model.parameters():
        param.requires_grad = False

    return model

def load_vgg_headless():
    #load vgg-16, remove fc layer
    model = vgg16(weights=VGG16_Weights.IMAGENET1K_V1)
    model.classifier = torch.nn.Sequential(*list(model.classifier.children())[:-1])

    # freeze params (not technically necessary)
    for param in model.parameters():
        param.requires_grad = False

    return model

## Extract Embeddings

In [None]:
def embeddings(dataloader, model):
    # function to get embeddings using model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    features = []
    model.eval()
    model.to(device)

    all_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(dataloader):
            all_labels = all_labels + labels.tolist()
            inputs = inputs.to(device)
            output = model(inputs)
            output = output.view(output.size(0), -1)
            features.append(output.cpu())

    return torch.cat(features), all_labels

In [None]:
# load models
resnet18_model = load_resnet_headless()
alexnet_model = load_alexnet_headless()
vgg16_model = load_vgg_headless()

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 171MB/s]
Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:05<00:00, 43.3MB/s]
Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:05<00:00, 94.8MB/s]


In [None]:
# get training set embeddings, make tables
train_resnet_embeddings, train_resnet_labels = embeddings(train_dataloader, resnet18_model)
train_resnet_embeddings_table = pd.DataFrame(train_resnet_embeddings).add_prefix("dim_")
train_resnet_embeddings_table["label"] = train_resnet_labels
train_resnet_embeddings_table.to_csv("resnet_embeddings_train.csv", index=False)

train_alexnet_embeddings, train_alexnet_labels = embeddings(train_dataloader, alexnet_model)
train_alexnet_embeddings_table = pd.DataFrame(train_alexnet_embeddings).add_prefix("dim_")
train_alexnet_embeddings_table["label"] = train_alexnet_labels
train_alexnet_embeddings_table.to_csv("alexnet_embeddings_train.csv", index=False)

train_vgg_embeddings, train_vgg_labels = embeddings(train_dataloader, vgg16_model)
train_vgg_embeddings_table = pd.DataFrame(train_vgg_embeddings).add_prefix("dim_")
train_vgg_embeddings_table["label"] = train_vgg_labels
train_vgg_embeddings_table.to_csv("vgg_embeddings_train.csv", index=False)

100%|██████████| 255/255 [1:01:17<00:00, 14.42s/it]
100%|██████████| 255/255 [04:10<00:00,  1.02it/s]
100%|██████████| 255/255 [04:57<00:00,  1.17s/it]


In [None]:
# get validation set embeddings, make tables
val_resnet_embeddings, val_resnet_labels = embeddings(val_dataloader, resnet18_model)
val_resnet_embeddings_table = pd.DataFrame(val_resnet_embeddings).add_prefix("dim_")
val_resnet_embeddings_table["label"] = val_resnet_labels
val_resnet_embeddings_table.to_csv("resnet_embeddings_val.csv", index=False)

val_alexnet_embeddings, val_alexnet_labels = embeddings(val_dataloader, alexnet_model)
val_alexnet_embeddings_table = pd.DataFrame(val_alexnet_embeddings).add_prefix("dim_")
val_alexnet_embeddings_table["label"] = val_alexnet_labels
val_alexnet_embeddings_table.to_csv("alexnet_embeddings_val.csv", index=False)

val_vgg_embeddings, val_vgg_labels = embeddings(val_dataloader, vgg16_model)
val_vgg_embeddings_table = pd.DataFrame(val_vgg_embeddings).add_prefix("dim_")
val_vgg_embeddings_table["label"] = val_vgg_labels
val_vgg_embeddings_table.to_csv("vgg_embeddings_val.csv", index=False)

100%|██████████| 64/64 [15:33<00:00, 14.59s/it]
100%|██████████| 64/64 [01:04<00:00,  1.00s/it]
100%|██████████| 64/64 [01:15<00:00,  1.18s/it]


In [None]:
# get test set embeddings, make tables
test_resnet_embeddings, test_resnet_labels = embeddings(test_dataloader, resnet18_model)
test_resnet_embeddings_table = pd.DataFrame(test_resnet_embeddings).add_prefix("dim_")
test_resnet_embeddings_table["label"] = test_resnet_labels
test_resnet_embeddings_table.to_csv("resnet_embeddings_test.csv", index=False)

test_alexnet_embeddings, test_alexnet_labels = embeddings(test_dataloader, alexnet_model)
test_alexnet_embeddings_table = pd.DataFrame(test_alexnet_embeddings).add_prefix("dim_")
test_alexnet_embeddings_table["label"] = test_alexnet_labels
test_alexnet_embeddings_table.to_csv("alexnet_embeddings_test.csv", index=False)

test_vgg_embeddings, test_vgg_labels = embeddings(test_dataloader, vgg16_model)
test_vgg_embeddings_table = pd.DataFrame(test_vgg_embeddings).add_prefix("dim_")
test_vgg_embeddings_table["label"] = test_vgg_labels
test_vgg_embeddings_table.to_csv("vgg_embeddings_test.csv", index=False)

100%|██████████| 185/185 [00:41<00:00,  4.51it/s]
100%|██████████| 185/185 [00:37<00:00,  4.95it/s]
100%|██████████| 185/185 [00:42<00:00,  4.32it/s]


After running the above files, I downloaded all .csv files and then manually compressed them on my local machine. Finally, I uploaded them to GitHub.

In [None]:
files.download("vgg_embeddings_train.csv")
files.download("vgg_embeddings_val.csv")
files.download("alexnet_embeddings_val.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>