<a href="https://colab.research.google.com/github/ZsofiaK/masterthesis/blob/main/Implementation/Pipeline/Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Classification (binary action recognition)

In [15]:
# Set the dataset, model and embedding specifics for the classification.

dataset_dir = 'Fish clips'

dataset_name = 'fishClips'

frame_selection_method = 'motionAbsdiff_10'

embedding_method = 'default'

clf_name = 'SVC-Linear'

seed = 23   # For reproducability in pseudo-randomness.

In [2]:
# Setting up name dictionary for later ease of use.

dataset_dict = {'fishClips' : 'Fish clips'}

In [3]:
# Mount Drive.
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Copy dataset
import shutil

data_source = f"/content/drive/My Drive/UvA/M Thesis/Data/{dataset_dir}"
data_dir = f"/content/{dataset_dir}"

# Copy the folder to destination
shutil.copytree(data_source, data_dir)

'/content/Fish clips'

In [5]:
# Import libraries
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight
from IPython.display import clear_output

In [6]:
# Load video embeddings and labels
embeddings_dir = f'{data_dir}/Embeddings/{frame_selection_method}/{embedding_method}'
clips_csv_path = f'{data_dir}/clips.csv'

clips_df = pd.read_csv(clips_csv_path)

nr_clips = len(clips_df)
progress = 0

X = []  # Embeddings
y = []  # Labels
video_names = []  # Video names for saving predictions

not_found_embeddings = []

for index, row in clips_df.iterrows():
    video_name = row['video'].replace('.mp4', '')
    label = row['label']

    embedding_path = os.path.join(embeddings_dir, f"{video_name}.npy")

    if os.path.exists(embedding_path):
        embedding = np.load(embedding_path)

        X.append(embedding)
        y.append(label)

        video_names.append(video_name)

    else:
      not_found_embeddings.append(video_name)

    progress += 1

    clear_output(wait=True)
    print(f'Number of videos: {nr_clips}')
    print(f'Progress: {progress/nr_clips * 100:.2f}%')

Number of videos: 220
Progress: 100.00%


In [7]:
if len(not_found_embeddings) > 0:
  print('Failed to find embeddings for:')

  for video in not_found_embeddings:
    print(video)

else:
  print('Success! All embeddings read.')

Success! All embeddings read.


In [8]:
# Converting to numpy.
X = np.array(X)
y = np.array(y)

In [9]:
# Handle Class Imbalance
class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)
class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}

In [13]:
# Setting up model dictionary for ease of reuse.
models_dict = {'LogisticRegression' : LogisticRegression(max_iter=1000, class_weight=class_weights_dict),\
               'SVC-Linear' : SVC(class_weight='balanced', kernel='linear')}

In [11]:
# Train-Test Split
X_train, X_test, y_train, y_test, video_names_train, video_names_test = train_test_split(
    X, y, video_names, test_size=0.2, stratify=y, random_state=seed)

In [16]:
# Train model
model = models_dict[clf_name]
model.fit(X_train, y_train)

In [17]:
# Make predictions
y_pred = model.predict(X_test)

In [18]:
# Prepare DataFrame with video names and predictions
predictions_df = pd.DataFrame({
    'video': video_names_test,
    'prediction': y_pred
})

# Save predictions to a CSV file in the "output" folder
output_dir = 'Output'
os.makedirs(output_dir, exist_ok=True)  # Create directory if it doesn't exist

predictions_csv_path = os.path.join(output_dir, f'pred_{dataset_name}_{frame_selection_method}_{embedding_method}_{clf_name}.csv')
predictions_df.to_csv(predictions_csv_path, index=False)

In [19]:
# Producing classification report.
report = classification_report(y_test, y_pred, target_names=['No attack', 'Attack'])

print(report)

              precision    recall  f1-score   support

   No attack       0.78      0.80      0.79        35
      Attack       0.12      0.11      0.12         9

    accuracy                           0.66        44
   macro avg       0.45      0.46      0.45        44
weighted avg       0.64      0.66      0.65        44



In [20]:
# Copy output to Drive.

for file in os.listdir(output_dir):
  source_dir = os.path.join(output_dir, file)

  dataset = file.split('_')[1]
  data_dir_name = dataset_dict[dataset]

  drive_dir = f"/content/drive/My Drive/UvA/M Thesis/Data/{data_dir_name}/Predictions"

  if not os.path.exists(drive_dir):
    os.makedirs(drive_dir)

  destination = f'{drive_dir}/{file}'

  shutil.copy(source_dir, destination)