In [6]:
import mediapipe as mp
import dlib
import numpy as np
import pandas as pd
import cv2
from pathlib import Path
import os

In [7]:
#Loading Facial Features

# Path to folder with CSVs
folder_path = 'C:\\cv_project\\code\\aggregated\\Facial_Features'
output_csv = 'C:\\cv_project\\code\\output_csv\\aggregated_video_features.csv'

# Get sorted list of CSV filenames
csv_files = sorted([f for f in os.listdir(folder_path) if f.endswith('.csv')])

# First write flag
first_write = True

for filename in csv_files:
    file_path = os.path.join(folder_path, filename)
    df = pd.read_csv(file_path)

    # Clean column names: strip spaces and drop unnamed (empty) columns
    df.columns = df.columns.str.strip()
    df = df.loc[:, ~df.columns.str.contains('^Unnamed', case=False)]

    # Select only numeric columns
    numeric_df = df.select_dtypes(include='number')

    # Compute average of features
    mean_features = numeric_df.mean().to_dict()

    # Add video name
    video_name = os.path.splitext(filename)[0]
    mean_features['video'] = video_name

    # Convert to DataFrame and reorder to keep 'video' first
    result_df = pd.DataFrame([mean_features])
    cols = ['video'] + [col for col in result_df.columns if col != 'video']
    result_df = result_df[cols]

    # Append to output CSV
    result_df.to_csv(output_csv, mode='a', index=False, header=first_write)
    first_write = False

    print(f"Processed: {filename}")

print(f"\n✅ Final aggregated CSV saved to: {output_csv}")


Processed: P1.csv
Processed: P10.csv
Processed: P11.csv
Processed: P12.csv
Processed: P13.csv
Processed: P14.csv
Processed: P15.csv
Processed: P16.csv
Processed: P17.csv
Processed: P20.csv
Processed: P21.csv
Processed: P22.csv
Processed: P24.csv
Processed: P25.csv
Processed: P27.csv
Processed: P29.csv
Processed: P3.csv
Processed: P30.csv
Processed: P31.csv
Processed: P32.csv
Processed: P33.csv
Processed: P34.csv
Processed: P35.csv
Processed: P37.csv
Processed: P4.csv
Processed: P42.csv
Processed: P43.csv
Processed: P44.csv
Processed: P45.csv
Processed: P47.csv
Processed: P48.csv
Processed: P49.csv
Processed: P5.csv
Processed: P50.csv
Processed: P52.csv
Processed: P53.csv
Processed: P55.csv
Processed: P56.csv
Processed: P57.csv
Processed: P58.csv
Processed: P59.csv
Processed: P6.csv
Processed: P60.csv
Processed: P61.csv
Processed: P62.csv
Processed: P63.csv
Processed: P64.csv
Processed: P65.csv
Processed: P66.csv
Processed: P67.csv
Processed: P69.csv
Processed: P7.csv
Processed: P70.csv

In [8]:
# Load the aggregated CSV
aggregated_df = pd.read_csv(output_csv)

# Sort by the 'video' column
sorted_df = aggregated_df.sort_values(by='video')

# Save the sorted DataFrame back to the CSV
sorted_df.to_csv(output_csv, index=False)

print(f"✅ Aggregated CSV sorted by 'video' column and saved to: {output_csv}")

✅ Aggregated CSV sorted by 'video' column and saved to: C:\cv_project\code\output_csv\aggregated_video_features.csv


### Smile Data

In [None]:
# Path to folder with txt files
folder_path = 'C:\\cv_project\\code\\aggregated\\smile_data'
output_csv = 'C:\\cv_project\\code\\output_csv\\smile_data.csv'

# Sorted list of .txt files
txt_files = sorted([f for f in os.listdir(folder_path) if f.endswith('.txt')])

# First write flag
first_write = True

for filename in txt_files:
    file_path = os.path.join(folder_path, filename)

    # Read space-separated values, no header, only first 3 columns
    try:
        df = pd.read_csv(file_path, sep=' ', header=None, usecols=['smile_intensity', 'head_nod', 'head_shake'])
    except Exception as e:
        print(f"⚠️ Skipping {filename}: {e}")
        continue

    # Compute mean of each column
    means = df.mean().to_dict()
    # Extract only the last part (e.g., P72) from the file name
    means['video'] = filename.split('-')[-1].replace('.txt', '')

    # Convert to DataFrame
    result_df = pd.DataFrame([means])
    cols = ['video'] + [col for col in result_df.columns if col != 'video']
    result_df = result_df[cols]

    # Append to CSV
    result_df.to_csv(output_csv, mode='a', index=False, header=first_write)
    first_write = False

    print(f"Processed: {filename}")

print(f"\n✅ Final aggregated TXT data saved to: {output_csv}")

Processed: Smoothed-features-P1.txt
Processed: Smoothed-features-P10.txt
Processed: Smoothed-features-P11.txt
Processed: Smoothed-features-P12.txt
Processed: Smoothed-features-P13.txt
Processed: Smoothed-features-P14.txt
Processed: Smoothed-features-P15.txt
Processed: Smoothed-features-P16.txt
Processed: Smoothed-features-P17.txt
Processed: Smoothed-features-P20.txt
Processed: Smoothed-features-P21.txt
Processed: Smoothed-features-P22.txt
Processed: Smoothed-features-P24.txt
Processed: Smoothed-features-P25.txt
Processed: Smoothed-features-P27.txt
Processed: Smoothed-features-P29.txt
Processed: Smoothed-features-P3.txt
Processed: Smoothed-features-P30.txt
Processed: Smoothed-features-P31.txt
Processed: Smoothed-features-P32.txt
Processed: Smoothed-features-P33.txt
Processed: Smoothed-features-P34.txt
Processed: Smoothed-features-P35.txt
Processed: Smoothed-features-P37.txt
Processed: Smoothed-features-P4.txt
Processed: Smoothed-features-P42.txt
Processed: Smoothed-features-P43.txt
Proc

In [11]:
# Update the 'video' column to remove 'Smoothed-features-' prefix
aggregated_df['video'] = aggregated_df['video'].str.replace('Smoothed-features-', '', regex=False)

print(aggregated_df['video'])

0      P1
1     P10
2     P11
3     P12
4     P13
     ... 
64    P83
65    P84
66    P85
67    P86
68    P89
Name: video, Length: 69, dtype: object
