<a href="https://colab.research.google.com/github/ZsofiaK/masterthesis/blob/main/Pipeline/Implementation/Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Classification (binary action recognition)

## Setting up requirements

In [1]:
# Setting up dictionaries for later ease of use.
dataset_dict = {'fishClips' : 'Fish clips', 'AK-fish' : 'AK fish'}

input_sizes_dict = {'dinov2-vits14-clf' : 384, 'dinov2-vitg14-clf' : 1536,
                    'dinov2-vits14-reg-clf' : 384, 'dinov2-vitg14-reg-clf' : 1536}

In [2]:
# Set the dataset, model and embedding specifics for the classification.

dataset_name = 'fishClips'

dataset_dir = dataset_dict[dataset_name]

image_size = 448    # The size of the embedded images.

frame_selection_method = 'motionAbsdiff_10'

embedding_method = 'dinov2-vitg14-reg-clf'

clf_name = 'ShallowNetwork'

val_score = 'roc_auc'  # Score to use during cross-validation.

seed = 23   # For reproducability in pseudo-randomness.

nr_frames = int(frame_selection_method.split('_')[-1])

input_size = input_sizes_dict[embedding_method] * nr_frames   # Size of the input vectors (embeddings).

In [3]:
# Setting up parameters for cross validation.

# Number of folds to use.
cv_folds = 3

# Parameter grids to use for the models.
param_grid = { 'ShallowNetwork': {
    'layers': [0, 1, 2, 3],
    'dropout_rate': [0.0, 0.25, 0.5],
    'learning_rate': [0.01, 0.001, 0.0001],
    'pos_threshold' : [0.1, 0.15, 0.2, 0.5],  # Threshold for turning sigmoid prediction to binary label.
    'epochs': [50],
    'batch_size': [32]
}
}

In [4]:
# Setting up folder to save outputs.
import os

output_dir = 'Output'
os.makedirs(output_dir, exist_ok=True)

In [5]:
# Mount Drive.
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
# Specify data source
data_dir = f"/content/drive/MyDrive/UvA/M Thesis/Data/{dataset_dir}"

## Loading data

In [7]:
# Import libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from IPython.display import clear_output
import shutil

In [8]:
# Load video embeddings and labels
embeddings_dir = f'{data_dir}/Embeddings/{embedding_method}/{image_size}'
clips_csv_path = f'{data_dir}/clips.csv'
frame_selection_path = f'{data_dir}/Selected frames/{dataset_name}_{frame_selection_method}.csv'

clips_df = pd.read_csv(clips_csv_path)
frames_df = pd.read_csv(frame_selection_path, index_col='video')

nr_clips = len(clips_df)
progress = 0

X_train = []  # Embeddings
X_test = []
y_train = []  # Labels
y_test = []

video_names_train = []  # Video names for saving predictions
video_names_test = []

not_found_embeddings = []

for index, row in clips_df.iterrows():
  skip_to_next = False

  video_name = row['video'].replace('.mp4', '')
  label = row['label']

  embedding_path = f'{embeddings_dir}/{video_name}'

  if video_name in video_names_train or video_name in video_names_test:
    continue      # Skip embeddings which have already been read.

  if not os.path.exists(embedding_path):
    not_found_embeddings.append((video_name, 'all'))
    continue

  frames = eval(frames_df['frames'][f'{video_name}.mp4'])

  embedding = []

  for frame_idx in frames:
    frame_embedding_path = f'{embedding_path}/{video_name}_{frame_idx}.npy'

    if not os.path.exists(frame_embedding_path):
      not_found_embeddings.append((video_name, frame_idx))

      skip_to_next = True   # Skip to next video

      break

    try:
      frame_embedding = np.load(frame_embedding_path)

    except:
      print(video_name, 'Unable to load an embedding.')
      break

    embedding.append(frame_embedding)

  if skip_to_next:
    continue

  np_embedding = np.concatenate(embedding)

  if row['type'] == 'train':
    X_train.append(np_embedding)
    y_train.append(label)
    video_names_train.append(video_name)

  elif row['type'] == 'test':
    X_test.append(np_embedding)
    y_test.append(label)
    video_names_test.append(video_name)

  progress += 1

  clear_output(wait=True)
  print(f'Number of videos: {nr_clips}')
  print(f'Progress: {progress/nr_clips * 100:.2f}%')

Number of videos: 220
Progress: 100.00%


In [9]:
# Check if all embeddings were successfully loaded.
if len(not_found_embeddings) > 0:
  print(f' WARNING: Failed to find embeddings for {len(not_found_embeddings)} videos.')

else:
  print('Success! All embeddings read.')

Success! All embeddings read.


In [10]:
# Check if all embeddings have the required input size and pad the ones which do not.
X_train_original = X_train.copy()
X_test_original = X_test.copy()
y_train_original = y_train.copy()
y_test_original = y_test.copy()
video_names_train_original = video_names_train.copy()
video_names_test_original = video_names_test.copy()

X_train = []
X_test = []
y_train = []
y_test = []
video_names_train = []
video_names_test = []

padded_train = 0
padded_test = 0

for i, array in enumerate(X_train_original):
  if len(array) == input_size:
    X_train.append(array)

  elif len(array) < input_size:
    difference = input_size - len(array)

    padding_size_beginning = difference // 2 + ((difference % 2) * 1)
    padding_size_end = difference // 2

    padded_array = np.pad(array, (padding_size_beginning, padding_size_end), \
                          mode='constant', constant_values=(0, 0))

    X_train.append(padded_array)

    padded_train += 1

  y_train.append(y_train_original[i])
  video_names_train.append(video_names_train_original[i])

for i, array in enumerate(X_test_original):
  if len(array) == input_size:
    X_test.append(array)

  elif len(array) < input_size:
    difference = input_size - len(array)

    padding_size_beginning = difference // 2 + ((difference % 2) * 1)
    padding_size_end = difference // 2

    padded_array = np.pad(array, (padding_size_beginning, padding_size_end), \
                           mode='constant', constant_values=(0, 0))

    X_test.append(padded_array)

    padded_test += 1

  y_test.append(y_test_original[i])
  video_names_test.append(video_names_test_original[i])

In [11]:
# Check if any embeddings had to be removed.
print(f'{padded_train} embeddings were padded in training set due to incorrect embedding size.')
print(f'{padded_test} embeddings were padded in test set due to incorrect embedding size.')

0 embeddings were padded in training set due to incorrect embedding size.
0 embeddings were padded in test set due to incorrect embedding size.


In [12]:
# Converting to numpy.
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [13]:
# Handle class imbalance through class weights
from sklearn.utils.class_weight import compute_class_weight

all_labels = np.concatenate((y_train, y_test))

class_weights = compute_class_weight('balanced', classes=np.unique(all_labels), y=all_labels)
class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}

print('Class weights:')
print(class_weights_dict)

Class weights:
{0: 0.625, 1: 2.5}


## Cross-validate model

In [14]:
## FUNCTIONS FOR MODEL CROSS-VALIDATION.

import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import recall_score, accuracy_score, f1_score, roc_auc_score
from IPython.display import clear_output

def create_shallow_network(input_dim, hidden_layers, dropout_rate, learning_rate):
    model = Sequential()

    # Dense input layer with ReLu
    model.add(Dense(10, input_dim=input_dim, activation='relu'))

    # Dense hidden layers with ReLu and dropout
    for _ in range(hidden_layers):
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(dropout_rate))

    # Dense output layer with sigmoid activation
    model.add(Dense(1, activation='sigmoid'))

    optimizer = Adam(learning_rate=learning_rate)

    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

def crossval_shallow_network(n_splits, input_dim, X, y, params_grid, \
                             val_score = 'accuracy', verbose=False):

  kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

  # Set up best result tracker for grid search loop
  best_score = 0

  best_params = {}

  # Calculate number of runs for progress monitoring.
  total_runs = 1

  for params in params_grid.values():
    total_runs *= len(params)

  total_runs *= n_splits

  progress = 0

  # Grid search loop
  for hidden_layers in params_grid['layers']:
      for dropout_rate in params_grid['dropout_rate']:
          for learning_rate in params_grid['learning_rate']:

            scores = {threshold : [] for threshold in params_grid['pos_threshold']}

            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]

                # Create and fit the model
                model = create_shallow_network(input_dim, hidden_layers, \
                                                dropout_rate, learning_rate)

                model.fit(X_train, y_train, epochs=10, verbose=0, class_weight=class_weights_dict)

                # Sigmoid output
                y_pred_raw = model.predict(X_test)

                # Predicted labels based on sigmoid output
                for threshold in params_grid['pos_threshold']:
                  y_pred = (y_pred_raw > threshold).astype(int).squeeze()

                  # Calculate performance based on selected score
                  if val_score == 'accuracy':
                    score = accuracy_score(y_test, y_pred)

                  elif val_score == 'recall':
                    score = recall_score(y_test, y_pred)

                  elif val_score == 'f1_score':
                    score = f1_score(y_test, y_pred)

                  elif val_score == 'roc_auc':
                    score = roc_auc_score(y_test, y_pred_raw)

                  else:
                    print(f'ERROR: unexpected validation score {val_score}.')
                    print('Select one of: accuracy, recall, f1_score')

                  scores[threshold].append(score)

                  progress += 1

                  clear_output(wait=True)
                  print(f'Cross-validation progress: {progress / total_runs * 100:.2f}%')

                # Best average score across all folds
                best_threshold = max(scores, key=lambda thr: np.mean(scores[thr]))
                best_average_score = np.mean(scores[best_threshold])

                # Check if current model settings beat the current best
                if best_average_score > best_score:
                    best_score = best_average_score
                    best_params = {'hidden_layers': hidden_layers,
                                  'dropout_rate': dropout_rate,
                                  'learning_rate': learning_rate,
                                  'pos_threshold': best_threshold}

  # Print best parameters and their score
  if verbose:
    print(f"\nBest Score: {best_score:.4f}")
    print("Best Parameters:", best_params)

  return best_params

In [15]:
# Setting up model dictionary for ease of reuse.
# Only models which do not need cross-validation are included in this.
# Models with cross-validation are handled on an individual basis.

models_dict = {
    'LogisticRegression' : LogisticRegression(max_iter=1000, class_weight=class_weights_dict),
    'SVC-Linear' : SVC(class_weight='balanced', kernel='linear')
    }

# Models which do not need cross-validation.
no_cross_val = list(models_dict.keys())

In [16]:
# Selecting model (with potential cross validation).
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# If model requires cross-validation.
if clf_name not in no_cross_val:
  if clf_name == 'ShallowNetwork':
    best_params = crossval_shallow_network(cv_folds, input_size, X_train, \
                                           y_train, param_grid['ShallowNetwork'],\
                                           val_score = val_score, verbose=True)

    model = create_shallow_network(input_size, best_params['hidden_layers'], \
                                 best_params['dropout_rate'], best_params['learning_rate'])

# If model does not require cross-validation.
else:
  model = models_dict[clf_name]

Cross-validation progress: 100.00%

Best Score: 0.5111
Best Parameters: {'hidden_layers': 3, 'dropout_rate': 0.5, 'learning_rate': 0.0001, 'pos_threshold': 0.1}


In [17]:
# Train model on the training set.
model.fit(X_train, y_train, class_weight=class_weights_dict)



<keras.src.callbacks.History at 0x7f78e12752d0>

In [18]:
# Make predictions.
y_pred_raw = model.predict(X_test)



In [19]:
# Choose best threshold in case of networks validated on ROC AUC.
# Threshold is selected to maximize F1 score.
from sklearn.metrics import precision_recall_curve

if clf_name == 'ShallowNetwork' and val_score == 'roc_auc':
  precision, recall, thresholds = precision_recall_curve(y_test, y_pred_raw)

  # Calculate F1 Scores for different thresholds
  f1_scores = 2 * recall * precision / (recall + precision)

  f1_scores = f1_scores[:-1]    # Drop last score as that corresponds to -inf threshold.

  f1_scores_nonnull = f1_scores[~np.isnan(f1_scores)]

  thresholds_nonnull = thresholds[~np.isnan(f1_scores)]

  optimal_idx = np.argmax(f1_scores_nonnull)

  pos_threshold = thresholds_nonnull[optimal_idx]

  best_params['pos_threshold'] = pos_threshold

  print('Optimal threshold:', pos_threshold)

Optimal threshold: 0.493469


  f1_scores = 2 * recall * precision / (recall + precision)


In [20]:
  # Saving best hyperparameters.
  best_params_df = pd.DataFrame([best_params])

  params_output_path = os.path.join(output_dir, 'Parameters')

  os.makedirs(params_output_path, exist_ok=True)

  params_csv_path = os.path.join(output_dir, f'Parameters/params_{dataset_name}_{frame_selection_method}_{embedding_method}_{image_size}_{clf_name}_{val_score.replace("_", "-")}.csv')

  best_params_df.to_csv(params_csv_path, index=False)

## Predictions and testing

## Save predictions for each instance of the test set.

In [21]:
# Prepare DataFrame with video names and predictions.
predictions_df = pd.DataFrame({
    'video': video_names_test,
    'prediction': y_pred_raw.squeeze(),
    'label' : y_test
})

# Create predictions output directory.
pred_output_path = os.path.join(output_dir, 'Predictions')
os.makedirs(pred_output_path, exist_ok=True)

# Save predictions.
predictions_csv_path = os.path.join(pred_output_path, f'pred_{dataset_name}_{frame_selection_method}_{embedding_method}_{image_size}_{clf_name}_{val_score.replace("_", "-")}.csv')
predictions_df.to_csv(predictions_csv_path, index=False)

In [22]:
# Copy prediction files to Drive.
for file in os.listdir(f'{output_dir}/Predictions'):
  source_file = os.path.join(f'{output_dir}/Predictions', file)

  if file.startswith('pred'):   # Only consider prediction outputs.
    drive_output_dir = f"/content/drive/My Drive/UvA/M Thesis/Data/Results/Predictions"

    if not os.path.exists(drive_output_dir):
      os.makedirs(drive_output_dir)

    destination = f'{drive_output_dir}/{file}'

    # Overwrite existing file.
    if os.path.exists(destination):
      os.remove(destination)

    shutil.copy(source_file, destination)

for file in os.listdir(f'{output_dir}/Parameters'):
  source_file = os.path.join(f'{output_dir}/Parameters', file)

  if file.startswith('params'):   # Only consider parameter outputs.
    drive_output_dir = f"/content/drive/My Drive/UvA/M Thesis/Data/Results/Parameters"

    if not os.path.exists(drive_output_dir):
      os.makedirs(drive_output_dir)

    destination = f'{drive_output_dir}/{file}'

    # Overwrite existing file.
    if os.path.exists(destination):
      os.remove(destination)

    shutil.copy(source_file, destination)

## Classification report on the complete test set

In [23]:
# Turn sigmoid predictions into binary ones in case of a network.
if clf_name == 'ShallowNetwork':
  y_pred = (y_pred_raw > best_params['pos_threshold']).astype(int).squeeze()

else:
  y_pred = y_pred_raw

In [24]:
# Producing classification report.
report = classification_report(y_test, y_pred, target_names=['No attack', 'Attack'])

print(report)

              precision    recall  f1-score   support

   No attack       0.81      0.81      0.81        36
      Attack       0.22      0.22      0.22         9

    accuracy                           0.69        45
   macro avg       0.51      0.51      0.51        45
weighted avg       0.69      0.69      0.69        45

