In [2]:
import os
import pandas as pd

RAW_BASE_PATH = '../../data/raw'
TARGET_SUFFIX = '_BoVW_openFace_2.1.0_Pose_Gaze_AUs.csv'

def preview_csv_data_shapes(base_path):
    for user_folder in os.listdir(base_path):
        user_path = os.path.join(base_path, user_folder)

        if not os.path.isdir(user_path) or not user_folder.endswith('_P'):
            continue

        video_dir = os.path.join(user_path, 'video')
        if not os.path.isdir(video_dir):
            continue

        for fname in os.listdir(video_dir):
            if fname.endswith(TARGET_SUFFIX):
                file_path = os.path.join(video_dir, fname)
                try:
                    df = pd.read_csv(file_path)
                    print(f"\nData shape from {file_path}: {df.shape}")
                except Exception as e:
                    print(f"Error loading {file_path}: {e}")
                break

preview_csv_data_shapes(RAW_BASE_PATH)



Data shape from ../../data/raw/302_P/video/302_BoVW_openFace_2.1.0_Pose_Gaze_AUs.csv: (7588, 102)

Data shape from ../../data/raw/301_P/video/301_BoVW_openFace_2.1.0_Pose_Gaze_AUs.csv: (8240, 102)


In [5]:
import os
import pandas as pd

RAW_BASE_PATH = '../../data/raw'
SAVE_BASE_PATH = '../../data/interim/video_features'
TARGET_SUFFIX_CSV = '_BoVW_openFace_2.1.0_Pose_Gaze_AUs.csv'

def process_and_save_csv_features(base_path, save_base_path):
    for user_folder in os.listdir(base_path):
        user_path = os.path.join(base_path, user_folder)

        if not os.path.isdir(user_path) or not user_folder.endswith('_P'):
            continue

        video_dir = os.path.join(user_path, 'video')
        if not os.path.isdir(video_dir):
            continue

        for fname in os.listdir(video_dir):
            if fname.endswith(TARGET_SUFFIX_CSV):
                file_path = os.path.join(video_dir, fname)
                try:
                    df = pd.read_csv(file_path)

                    # Drop first column
                    df = df.iloc[:, 1:]

                    # Rename second column to 'time' and the rest to video02_fX
                    if df.shape[1] > 0:
                        new_columns = ['time'] + [f'video02_f{i+1}' for i in range(df.shape[1] - 1)]
                        df.columns = new_columns
                    else:
                        print(f"⚠️ Warning: No columns left after dropping first column in {file_path}")

                    save_user_dir = os.path.join(save_base_path, user_folder)
                    os.makedirs(save_user_dir, exist_ok=True)
                    save_path = os.path.join(save_user_dir, 'processed_video_features02.parquet')

                    df.to_parquet(save_path, index=False)
                    print(f"✅ Processed and saved: {save_path}")

                except Exception as e:
                    print(f"❌ Error processing {file_path}: {e}")
                break

process_and_save_csv_features(RAW_BASE_PATH, SAVE_BASE_PATH)


✅ Processed and saved: ../../data/interim/video_features/302_P/processed_video_features02.parquet
✅ Processed and saved: ../../data/interim/video_features/301_P/processed_video_features02.parquet


In [6]:
import os
import pandas as pd

SAVE_BASE_PATH = '../../data/interim/video_features'

def load_and_display_saved_audio_features(save_base_path):
    for user_folder in os.listdir(save_base_path):
        user_path = os.path.join(save_base_path, user_folder)
        if not os.path.isdir(user_path):
            continue

        parquet_file = os.path.join(user_path, 'processed_video_features02.parquet')
        if os.path.isfile(parquet_file):
            try:
                df = pd.read_parquet(parquet_file)
                print(f"\n📊 User {user_folder} - processed_audio_features.parquet")
                display(df.head())  # display first few rows as table
            except Exception as e:
                print(f"❌ Error loading {parquet_file}: {e}")

load_and_display_saved_audio_features(SAVE_BASE_PATH)



📊 User 302_P - processed_audio_features.parquet


Unnamed: 0,time,video02_f1,video02_f2,video02_f3,video02_f4,video02_f5,video02_f6,video02_f7,video02_f8,video02_f9,...,video02_f91,video02_f92,video02_f93,video02_f94,video02_f95,video02_f96,video02_f97,video02_f98,video02_f99,video02_f100
0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0



📊 User 301_P - processed_audio_features.parquet


Unnamed: 0,time,video02_f1,video02_f2,video02_f3,video02_f4,video02_f5,video02_f6,video02_f7,video02_f8,video02_f9,...,video02_f91,video02_f92,video02_f93,video02_f94,video02_f95,video02_f96,video02_f97,video02_f98,video02_f99,video02_f100
0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
import os
import pandas as pd

SAVE_BASE_PATH = '../../data/interim/video_features'

def print_pca_shapes(base_path):
    for user_folder in os.listdir(base_path):
        user_path = os.path.join(base_path, user_folder)
        if not os.path.isdir(user_path):
            continue

        parquet_file = os.path.join(user_path, 'processed_video_features02.parquet')
        if os.path.isfile(parquet_file):
            try:
                df = pd.read_parquet(parquet_file)
                print(f"User {user_folder} PCA data shape: {df.shape}")
            except Exception as e:
                print(f"Error loading {parquet_file}: {e}")

print_pca_shapes(SAVE_BASE_PATH)


User 302_P PCA data shape: (7588, 101)
User 301_P PCA data shape: (8240, 101)


In [8]:
import os
import pandas as pd

SAVE_BASE_PATH = '../../data/interim/video_features'

def load_and_check_nulls(save_base_path):
    for user_folder in os.listdir(save_base_path):
        user_path = os.path.join(save_base_path, user_folder)
        if not os.path.isdir(user_path):
            continue

        parquet_file = os.path.join(user_path, 'processed_video_features02.parquet')
        if os.path.isfile(parquet_file):
            try:
                df = pd.read_parquet(parquet_file)
                has_nulls = df.isnull().values.any()
                print(f"\n📊 User {user_folder} - processed_audio_features.parquet")
                print(f"Contains null values? {'Yes' if has_nulls else 'No'}")
            except Exception as e:
                print(f"❌ Error loading {parquet_file}: {e}")

load_and_check_nulls(SAVE_BASE_PATH)



📊 User 302_P - processed_audio_features.parquet
Contains null values? No

📊 User 301_P - processed_audio_features.parquet
Contains null values? No
