### Import libraries

In [None]:
from my_utils import emotion_dict, sentiment_dict, film_frames_dict

import os
from tqdm import tqdm
import cv2
import numpy as np
import pandas as pd
import librosa
import librosa.display

In [None]:
from google.colab import drive
drive.mount('/content/drive')

folder = '/content/drive/My Drive/Colab Notebooks/Dissertation'

os.chdir(folder)

video_dir = '../../Cropped_Videos' # folder where film clips are stored
audio_dir = '../../Audio' # folder where film audio files are stored
output_dir = './Datasets'
results_dir = './Results'

# create folders for saving results
os.makedirs(output_dir, exist_ok=True)
os.makedirs(results_dir, exist_ok=True)

film_keys = list(emotion_dict.keys())
datasets = ['frame_level', 'video_level']
results = ['eda', 'stats_analysis','modelling']
feature_group = ['rgb_hsv','audio','optical_flow']

# creating folders to save feature sets and results for Data Analysis / Modelling for each frame_level and video_level
for dataset in datasets:
    os.makedirs(os.path.join(output_dir, dataset), exist_ok=True)
    os.makedirs(os.path.join(results_dir, dataset), exist_ok=True)
    for r_type in results:
        os.makedirs(os.path.join(results_dir, dataset, r_type), exist_ok=True)

# 1. Feature Extraction

#### Function to merge features from all the videos in a single file per video id (for both frame-level features and video-level features)

* *Frame-level features*: result in 1 row of features per image frame
* *Video-level features*: result in 1 row of features per video, by averaging the values of all frame-level feature

In [None]:
# Function to merge all features from all videos in the dataset for a given feature group

def merge_dataset(input_dir, feature_group):

    output_file = os.path.join(input_dir, f"features_{feature_group}_df.csv") # file name for saving the dataframe with all extracted features
    files = [f for f in os.listdir(input_dir) if f.endswith(f"_{feature_group}_df.csv")]
    merged_df = pd.concat([pd.read_csv(os.path.join(input_dir, f)) for f in files], ignore_index=True)
    merged_df = merged_df.sort_values(by="video_id")
    sort_columns = ["video_id", "frame_id"] if merged_df.columns[1] == "frame_id" else ["video_id"]
    merged_df = merged_df.sort_values(by=sort_columns, ascending=True)
    merged_df.to_csv(output_file, index=False, mode='w')
    print(f"Merged {feature_group} features saved to {output_file}")


### 1.1 RGB_HSV features

In [None]:
def extract_rgb_hsv_statistics(frame):
    # function takes in an RGB image frame
    rgb = cv2.split(frame) # get the RGB values
    hsv = cv2.cvtColor(frame, cv2.COLOR_RGB2HSV) # convert to HSV
    hsv = cv2.split(hsv) # get the HSV values

    # Compute mean, median, std for each channel across all the pixels in the frame
    features = {}
    channels = ['R', 'G', 'B', 'H', 'S', 'V']
    for i, channel in enumerate(rgb + hsv):
        features[f"{channels[i]}_mean"] = np.mean(channel)
        features[f"{channels[i]}_median"] = np.median(channel)
        features[f"{channels[i]}_std"] = np.std(channel)

    return features

def extract_colourfulness(frame):
    (R, G, B) = cv2.split(frame.astype("float"))
    rg = np.abs(R - G)
    yb = np.abs(0.5 * (R + G) - B)

    var_rg, mu_rg = np.var(rg), np.mean(rg)
    var_yb, mu_yb = np.var(yb), np.mean(yb)

    mu_sq = (mu_rg ** 2) + (mu_yb ** 2)

    colourfulness = np.sqrt(var_rg + var_yb) + 0.3 * np.sqrt(mu_sq)

    return colourfulness


def extract_rgb_hsv_features(video_path, frame_interval=0, output_dir=output_dir):

    frame_idx = 0
    f_idx = 0
    features_list = []

    cap = cv2.VideoCapture(video_path)
    # obtain the video id based on the file name
    video_id = int(os.path.splitext(os.path.basename(video_path))[0].split('_')[0])

    fps = int(cap.get(cv2.CAP_PROP_FPS))  # get number of frames per second
    frame_interval = round(fps)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # capture only one frame per second
        if frame_interval <= 0 or frame_idx % frame_interval == 0:

            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            rgb_hsv = extract_rgb_hsv_statistics(frame)
            colourfulness = extract_colourfulness(frame)
            rgb_hsv['colourfulness'] = colourfulness

            # append video id, frame id, emotion, sentiment
            frame_level = {"video_id": video_id, "frame_id": f_idx,
                           "emotion": emotion_dict[video_id],
                           "sentiment": sentiment_dict[video_id]}
            frame_level.update(rgb_hsv)
            features_list.append(frame_level)

            f_idx += 1

        frame_idx += 1

    cap.release()

    # save all frame features into a single CSV file per video
    video_df = pd.DataFrame(features_list)

    video_filename = os.path.join(output_dir, f"frame_level/{video_id}_frames_rgb_hsv_df.csv")
    video_df.to_csv(video_filename, index=False)

    # save video-level features
    video_level = video_df.drop(columns=["frame_id"]).groupby(["video_id","emotion","sentiment"]).mean().reset_index() # take the mean of all the features for each video
    video_level_filename = os.path.join(output_dir, f"video_level/{video_id}_video_rgb_hsv_df.csv")
    video_level.to_csv(video_level_filename, index=False)

In [None]:
# run to extract rgb_hsv features from videos
# tqdm used to output a progress bar given that this process was computationally expensive, and was taking a long time to complete

for vid in tqdm(film_keys, desc="Processing videos"):
    for ext in [".mp4", ".mpeg", ".mpg", ".mov"]: # check the file format
        video_path = os.path.join(video_dir, f"{vid}_cropped{ext}")
        if os.path.exists(video_path):
            extract_rgb_hsv_features(video_path)

print("Completed colour feature extraction for all videos")

In [None]:
# save rgb_hsv features from all videos into a single file, both at frame-level & video-level
input_dir = os.path.join(output_dir, "frame_level")
merge_dataset(input_dir, 'rgb_hsv')

input_dir = os.path.join(output_dir, "video_level")
merge_dataset(input_dir, 'rgb_hsv')

### 1.2 Audio features

In [None]:
def extract_audio_features(audio_path, output_dir=output_dir):

    # obtain the audio id based on the file name
    audio_id = int(os.path.splitext(os.path.basename(audio_path))[0].split('_')[0].split('_')[0])

    # load audio file
    signal, sampling_rate = librosa.load(audio_path)

    print("Sampling rate:", sampling_rate)

    # values ensure that one-second segments of audio are extracted
    FRAME_SIZE = np.round(sampling_rate)
    HOP_LENGTH = np.round(sampling_rate)

    # extract audio features from each one-second segment
    spectral_centroid = librosa.feature.spectral_centroid(y=signal, sr=sampling_rate, n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=signal, sr=sampling_rate, n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)
    rms_energy = librosa.feature.rms(y=signal, frame_length=FRAME_SIZE, hop_length=HOP_LENGTH)
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y=signal, frame_length=FRAME_SIZE, hop_length=HOP_LENGTH)
    chroma = librosa.feature.chroma_stft(y=signal, hop_length=HOP_LENGTH)
    chroma_mean = np.mean(chroma, axis=0).reshape(1, -1)
    mfccs = librosa.feature.mfcc(y=signal, n_mfcc=13, sr=sampling_rate, n_fft=FRAME_SIZE, hop_length=HOP_LENGTH)
    mfccs_mean = np.mean(mfccs, axis=0).reshape(1, -1)
    delta_mfccs = librosa.feature.delta(mfccs)
    delta_mfccs_mean = np.mean(delta_mfccs, axis=0).reshape(1, -1)
    delta2_mfccs = librosa.feature.delta(mfccs, order=2)
    delta2_mfccs_mean = np.mean(delta2_mfccs, axis=0).reshape(1, -1)

    # calculate amplitude envelope for each one-second segment
    amplitude_envelope = []

    for i in range (0, len(signal), HOP_LENGTH):
        curr_frame_amplitude_envelope = max(signal[i:i+FRAME_SIZE])
        amplitude_envelope.append(curr_frame_amplitude_envelope)

    amplitude_envelope = np.array(amplitude_envelope).reshape(1, -1)

    # ensure consistent feature length
    min_length = min(spectral_centroid.shape[1], spectral_bandwidth.shape[1], \
                    rms_energy.shape[1], zero_crossing_rate.shape[1], \
                    amplitude_envelope.shape[1], mfccs_mean.shape[1], chroma_mean.shape[1])

    # Truncate all features to the same length
    spectral_centroid = spectral_centroid[:, :min_length]
    spectral_bandwidth = spectral_bandwidth[:, :min_length]
    rms_energy = rms_energy[:, :min_length]
    zero_crossing_rate = zero_crossing_rate[:, :min_length]
    chroma = chroma[:, :min_length]
    chroma_mean = chroma_mean[:, :min_length]
    mfccs = mfccs[:, :min_length]
    mfccs_mean = mfccs_mean[:, :min_length]
    delta_mfccs = delta_mfccs[:, :min_length]
    delta_mfccs_mean = delta_mfccs_mean[:, :min_length]
    delta2_mfccs = delta2_mfccs[:, :min_length]
    delta2_mfccs_mean = delta2_mfccs_mean[:, :min_length]
    amplitude_envelope = amplitude_envelope[:, :min_length]

    # stack features into a final feature vector for each sample
    features = np.vstack([spectral_centroid, spectral_bandwidth, rms_energy,
                          zero_crossing_rate, amplitude_envelope, chroma_mean,
                          mfccs_mean, delta_mfccs_mean, delta2_mfccs_mean,
                          chroma, mfccs, delta_mfccs, delta2_mfccs]).T

    num_feature_vector = features.shape[0]  # calculating the total number of feature vectors

    # define feature names
    feature_names = ["spectral_centroid", "spectral_bandwidth", "rms_energy",
                     "zero_crossing_rate", "amplitude_envelope","chroma_mean",
                     "mfccs_mean", "delta_mfccs_mean", "delta2_mfccs_mean"]

    for i in range(0,chroma.shape[0]):
        feature_names.append(f"chroma_{i}")
    for i in range(0,mfccs.shape[0]):
        feature_names.append(f"mfccs_{i}")
    for i in range(0,delta_mfccs.shape[0]):
        feature_names.append(f"delta_mfccs_{i}")
    for i in range(0,delta2_mfccs.shape[0]):
        feature_names.append(f"delta2_mfccs_{i}")

    # creating a data frame for audio features
    video_df = pd.DataFrame(features, columns=feature_names)

    video_id = audio_id
    f_idxs = np.arange(num_feature_vector)

    features_list = {"video_id": audio_id,
                     "frame_id": f_idxs,
                     "emotion": emotion_dict[audio_id],
                     "sentiment": sentiment_dict[audio_id] }

    video_df = pd.concat([pd.DataFrame(features_list), video_df], axis=1)

    video_id = int(os.path.splitext(os.path.basename(audio_path))[0].split('_')[0].split('_')[0])
    video_filename = os.path.join(output_dir, f"frame_level/{video_id}_frames_audio_df.csv")
    video_df.to_csv(video_filename, index=False)

    # Save video-level summary
    video_level = video_df.drop(columns=["frame_id"]).groupby(["video_id","emotion","sentiment"]).mean().reset_index() # take the mean of all the features for each video
    video_level_filename = os.path.join(output_dir, f"video_level/{video_id}_video_audio_df.csv")
    video_level.to_csv(video_level_filename, index=False)


In [None]:
# run to extract audio features from videos
for vid in tqdm(film_keys, desc="Processing audio"):
    audio_path = os.path.join(audio_dir, f"{vid}_audio.mp3")
    if os.path.exists(audio_path):
        extract_audio_features(audio_path)

# merge all audio features into a single file
input_dir = os.path.join(output_dir, "video_level")
merge_dataset(input_dir, 'audio')

# merge all audio features into a single file
input_dir = os.path.join(output_dir, "frame_level")
merge_dataset(input_dir, 'audio')

print("Completed audio feature extraction for all videos")

### 1.3 Motion (Optical Flow) features


In [None]:
# extract optical flow features using Farneback's method

def extract_optical_flow_features(video_path, frame_interval=0, output_dir=output_dir):

    frame_idx = 0
    f_idx = 0
    features_list = []

    cap = cv2.VideoCapture(video_path)
    video_id = int(os.path.splitext(os.path.basename(video_path))[0].split('_')[0])

    fps = int(cap.get(cv2.CAP_PROP_FPS))  # get frames per second
    frame_interval = round(fps)

    # read the first frame
    ret, prev_frame = cap.read()
    if not ret:
        print(f"Failed to read the video: {video_path}")
        return

    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

    while cap.isOpened():

        ret, frame = cap.read()
        if not ret:
            break

        if frame_interval <= 0 or frame_idx % frame_interval == 0: # ensures that frames are being compared at 1 second intervals

            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            # Calculate dense optical flow
            flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None,
                                                pyr_scale=0.5, levels=3, winsize=15,
                                                iterations=3, poly_n=5, poly_sigma=1.2, flags=0)

            # Compute magnitude and angle of the flow
            magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])

            # Average magnitude and angle
            avg_magnitude = np.mean(magnitude)
            avg_angle = np.mean(angle)

            frame_level = { 'video_id': video_id,
                           'frame_id': f_idx, "emotion": emotion_dict[video_id],
                            "sentiment": sentiment_dict[video_id],
                            'avg_magnitude': avg_magnitude, 'avg_angle': avg_angle }

            prev_gray = gray # set previous frame as the current frame
            f_idx += 1

            features_list.append(frame_level)

        frame_idx += 1

    cap.release()

    # save features to a CSV file
    df = pd.DataFrame(features_list)

    # save frame-level features
    os.makedirs(os.path.join(output_dir, "frame_level"), exist_ok=True)
    output_file = os.path.join(output_dir, f"frame_level/{video_id}_optical_flow_df.csv")
    df.to_csv(output_file, index=False)

    # save video-level features
    video_summary = df.drop(columns=["frame_id"]).groupby(["video_id", "emotion", "sentiment"]).mean().reset_index() # take the mean of all the features for each video
    os.makedirs(os.path.join(output_dir, "video_level"), exist_ok=True)
    summary_file = os.path.join(output_dir, f"video_level/{video_id}_optical_flow_df.csv")
    video_summary.to_csv(summary_file, index=False)


In [None]:
# run to extract optical flow features

for vid in tqdm(film_keys, desc="Processing videos"):
    for ext in [".mp4", ".mpeg", ".mpg", ".mov"]:
        video_path = os.path.join(video_dir, f"{vid}_cropped{ext}")
        if os.path.exists(video_path):
            extract_optical_flow_features(video_path)

# merge all optical flow features into a single file
input_dir = os.path.join(output_dir, "frame_level")
merge_dataset(input_dir, 'optical_flow')

# merge all optical flow features into a single file
input_dir = os.path.join(output_dir, "video_level")
merge_dataset(input_dir, 'optical_flow')

print("Completed optical flow feature extraction for all videos")