<a href="https://colab.research.google.com/github/ZsofiaK/masterthesis/blob/main/Implementation/Experimentation/Exp_clf_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Classification pipeline

## Importing data and modules.

In [6]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import zipfile

# Path to the zipped data in Drive
zip_file_path = '/content/drive/My Drive/UvA/M Thesis/Data/fish_mock_sample.zip'

# Directory to extract the contents
extract_dir = '/content/'

# Create the extract directory if it does not yet exist
import os
os.makedirs(extract_dir, exist_ok=True)

# Extract the contents of the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

In [7]:
# Importing any additional modules.

import shutil

module_files = ['frame_selection.py']

for module in module_files:
  # Path to the .py file in your Google Drive
  source_file_path = f'/content/drive/My Drive/UvA/M Thesis/Modules/{module}'

  # Destination path.
  destination_file_path = f'/content/{module}'

  # Copy the file from Google Drive to the content folder
  shutil.copy(source_file_path, destination_file_path)

  # Verify that the file has been copied
  print(f"File copied from '{source_file_path}' to '{destination_file_path}'.")

File copied from '/content/drive/My Drive/UvA/M Thesis/Modules/frame_selection.py' to '/content/frame_selection.py'.


## Classification pipeline

In [8]:
from frame_selection import count_frames, calculate_ssim, select_frames_ssim, select_frames_evenly
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from PIL import Image

In [9]:
# Loading the DINOv2 model
lvm = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')

Downloading: "https://github.com/facebookresearch/dinov2/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth" to /root/.cache/torch/hub/checkpoints/dinov2_vits14_pretrain.pth
100%|██████████| 84.2M/84.2M [00:00<00:00, 122MB/s]


In [10]:
from IPython.display import clear_output

In [32]:
# Loading data.

data_dir = "fish_mock_sample"

# Set the type of frame selection here.
frame_selection = select_frames_evenly

videos = []
labels = []

# Automatically discover subfolders and use their names as labels
label_folders = [f for f in os.listdir(data_dir) \
                 if os.path.isdir(os.path.join(data_dir, f))]

nr_labels = len(label_folders)
label_counter = 1

for label_counter, label in enumerate(label_folders):
    # Construct the path to the subfolder
    subfolder_path = os.path.join(data_dir, label)

    nr_videos = len(os.listdir(subfolder_path))

    for video_counter, video_filename in enumerate(os.listdir(subfolder_path)):
        clear_output(wait=True)
        print(f'Processing label {label_counter+1} / {nr_labels}')
        print(f'Processing video {video_counter+1} / {nr_videos}')

        video_path = os.path.join(subfolder_path, video_filename)

        video_output = frame_selection(video_path)

        videos.append(video_output)
        labels.append(label)

clear_output(wait=True)
print(f"Processed {len(videos)} videos.")

Processed 10 videos.


In [12]:
# Saving the selected frames of each video in a separate list.
# This is because frame selection methods return the frames AND the selected indices in a tuple.
# Only the frames are needed in this pipeline.

video_frames = [item[0] for item in videos]

In [21]:
# Defining functions for image handling and embedding.

transform = T.Compose([
    T.Resize(224),
    T.CenterCrop(224),
    T.ToTensor()
    ])

def generate_embeddings(frames, transform):
    embeddings = []

    for frame in frames:
      img = transform(Image.fromarray(frame))[:3].unsqueeze(0)

      with torch.no_grad():
        embedding = lvm(img)[0]

        embeddings.append(embedding.squeeze().numpy())

    return np.concatenate(embeddings)

In [22]:
# Generating embeddings for each selected frame of each video.

embedded_videos = []

for frames in video_frames:
  embedding = generate_embeddings(frames, transform)

  embedded_videos.append(embedding)

### Training a simple classifier on the embeddings.

In [23]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [28]:
# Split the data into training and testing sets.
X_train, X_test, y_train, y_test = train_test_split(embedded_videos, labels, test_size=0.2, random_state=23)

In [29]:
# Initialize and train the logistic regression classifier.
classifier = LogisticRegression()
classifier.fit(X_train, y_train)

In [30]:
# Make predictions on the test set.
y_pred = classifier.predict(X_test)

In [31]:
# Calculate accuracy.
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.5
