<a href="https://colab.research.google.com/github/ZsofiaK/masterthesis/blob/main/Implementation/Pipeline/Frame_selection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Saving selected frame indices

This is an auxilliary notebook which saves the selected frame indices for videos to be read later in the classification pipeline.

In [1]:
# Set the desired frame selection specifics and dataset name here.

selection_name = 'motionAbsdiff'

nr_frames = 10

dataset_name = 'fishClips'

In [2]:
# Mount Drive.

from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Copy necessary modules.

import shutil

module_files = ['frame_selection.py']

for module in module_files:
  # Path to the .py file in your Google Drive
  source_file_path = f'/content/drive/My Drive/UvA/M Thesis/Modules/{module}'

  # Destination path.
  destination_file_path = f'/content/{module}'

  # Copy the file from Google Drive to the content folder
  shutil.copy(source_file_path, destination_file_path)

In [4]:
import frame_selection
from frame_selection import count_frames

# Set up dictionaries for later ease of use.
selection_methods = {'evenly' : frame_selection.select_frames_evenly, \
                     'motionAbsdiff' : frame_selection.select_frames_motion_absdiff}

dataset_directories = {'fishClips' : 'Fish clips'}

In [5]:
# Copy dataset

data_source = f"/content/drive/My Drive/UvA/M Thesis/Data/{dataset_directories[dataset_name]}"
data_dir = f"/content/{dataset_directories[dataset_name]}"

# Copy the folder to destination
shutil.copytree(data_source, data_dir)

'/content/Fish clips'

In [6]:
from pathlib import Path
import pandas as pd

# Check if frames are already counted, do so otherwise.

frame_counts_path = f'{data_dir}/frame_counts.csv'

if Path(frame_counts_path).exists():
  frame_counts = pd.read_csv(frame_counts_path)

else:
  folder_path = f'{data_dir}/Clips'

  files = os.listdir(folder_path)

  nr_files = len(files)

  data = []

  for i, file in enumerate(files):
    if file.lower().endswith('.mp4'):
        video_path = os.path.join(folder_path, file)

        total_frames = count_frames(video_path)

        data.append({"video": file, "total_frames" : total_frames})

        clear_output(wait=True)
        print(f'Number of videos: {nr_files}')
        print(f'Progress: {(i + 1)/nr_files * 100:.2f}%')

  # Convert the data into a pandas DataFrame
  frame_counts = pd.DataFrame(data)

  # Save the DataFrame to a CSV file
  csv_file_path = f'{data_dir}/frame_counts.csv'

  frame_counts.to_csv(csv_file_path, index=False)

  # Copy file to Drive
  shutil.copy(csv_file_path, f'/content/drive/My Drive/UvA/M Thesis/Data/{dataset_directories[dataset_name]}/frame_counts.csv')

In [7]:
import os

selection = selection_methods[selection_name]

output_name = f'{dataset_name}_{selection_name}_{str(nr_frames)}'

output_folder = f'/content/Output'

os.makedirs(output_folder, exist_ok=True)

In [8]:
import pandas as pd
from IPython.display import clear_output

folder_path = f'{data_dir}/Clips'

# Initialize a list to hold the data
data = []

files = os.listdir(folder_path)
nr_files = len(files)

for i, file in enumerate(files):
    if file.lower().endswith('.mp4'):
        video_path = os.path.join(folder_path, file)

        total_frames = frame_counts[frame_counts['video'] == file]["total_frames"]

        frames = selection(video_path, total_frames, nr_frames)

        data.append({"video": file, "total_frames" : total_frames, "frames": frames})

        clear_output(wait=True)
        print(f'Number of videos: {nr_files}')
        print(f'Progress: {(i + 1)/nr_files * 100:.2f}%')

# Convert the data into a pandas DataFrame
df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
csv_file_path = f'{output_folder}/{output_name}.csv'

df.to_csv(csv_file_path, index=False)


Number of videos: 220
Progress: 100.00%


In [9]:
# Copy all output files to the appropriate Drive folders

for file in os.listdir(output_folder):
  output_file_path = os.path.join(output_folder, file)

  data_name = file.split('_')[0]

  data_dir_name = dataset_directories[data_name]

  destination_file_path = f'/content/drive/My Drive/UvA/M Thesis/Data/{data_dir_name}/Selected frames'

  shutil.copy(output_file_path, destination_file_path)