In [1]:
import os
import pandas as pd

RAW_BASE_PATH = '../../data/raw'
SAVE_BASE_PATH = '../../data/interim/audio_features'
TARGET_SUFFIX = '_BoAW_openSMILE_2.3.0_MFCC.csv'

def process_and_save_audio_features(base_path, save_base_path):
    for user_folder in os.listdir(base_path):
        user_path = os.path.join(base_path, user_folder)

        if not os.path.isdir(user_path) or not user_folder.endswith('_P'):
            continue

        audio_dir = os.path.join(user_path, 'audio')
        if not os.path.isdir(audio_dir):
            continue

        for fname in os.listdir(audio_dir):
            if fname.endswith(TARGET_SUFFIX):
                file_path = os.path.join(audio_dir, fname)
                try:
                    # Load CSV without headers since your CSV doesn't have them
                    df = pd.read_csv(file_path, header=None)

                    # Drop first column
                    df = df.drop(columns=[0])

                    # Rename columns: 1st col = 'time', rest = audio_f1, audio_f2, ...
                    new_columns = ['time'] + [f'audio02_f{i}' for i in range(1, df.shape[1])]
                    df.columns = new_columns

                    # Prepare save path
                    save_user_dir = os.path.join(save_base_path, user_folder)
                    os.makedirs(save_user_dir, exist_ok=True)
                    save_path = os.path.join(save_user_dir, 'processed_audio_features02.parquet')

                    # Save to parquet
                    df.to_parquet(save_path, index=False)
                    print(f"✅ Processed and saved: {save_path}")

                except Exception as e:
                    print(f"❌ Error processing {file_path}: {e}")
                break

process_and_save_audio_features(RAW_BASE_PATH, SAVE_BASE_PATH)


✅ Processed and saved: ../../data/interim/audio_features/302_P/processed_audio_features02.parquet
✅ Processed and saved: ../../data/interim/audio_features/301_P/processed_audio_features02.parquet


In [2]:
import os
import pandas as pd

SAVE_BASE_PATH = '../../data/interim/audio_features'

def load_and_display_saved_audio_features(save_base_path):
    for user_folder in os.listdir(save_base_path):
        user_path = os.path.join(save_base_path, user_folder)
        if not os.path.isdir(user_path):
            continue

        parquet_file = os.path.join(user_path, 'processed_audio_features02.parquet')
        if os.path.isfile(parquet_file):
            try:
                df = pd.read_parquet(parquet_file)
                print(f"\n📊 User {user_folder} - processed_audio_features.parquet")
                display(df.head())  # display first few rows as table
            except Exception as e:
                print(f"❌ Error loading {parquet_file}: {e}")

load_and_display_saved_audio_features(SAVE_BASE_PATH)



📊 User 302_P - processed_audio_features.parquet


Unnamed: 0,time,audio02_f1,audio02_f2,audio02_f3,audio02_f4,audio02_f5,audio02_f6,audio02_f7,audio02_f8,audio02_f9,...,audio02_f91,audio02_f92,audio02_f93,audio02_f94,audio02_f95,audio02_f96,audio02_f97,audio02_f98,audio02_f99,audio02_f100
0,0.0,0.0,0.0,0.0,0.0,0.0,0.60206,0.0,1.30103,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.1,0.0,0.0,0.0,0.0,0.0,0.69897,0.0,1.30103,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.2,0.0,0.0,0.0,0.0,0.0,0.69897,0.0,1.342423,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.3,0.0,0.0,0.0,0.0,0.0,0.90309,0.0,1.380211,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.4,0.0,0.0,0.0,0.0,0.0,0.90309,0.0,1.39794,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0



📊 User 301_P - processed_audio_features.parquet


Unnamed: 0,time,audio02_f1,audio02_f2,audio02_f3,audio02_f4,audio02_f5,audio02_f6,audio02_f7,audio02_f8,audio02_f9,...,audio02_f91,audio02_f92,audio02_f93,audio02_f94,audio02_f95,audio02_f96,audio02_f97,audio02_f98,audio02_f99,audio02_f100
0,0.0,0.0,0.0,0.0,0.0,0.0,0.90309,0.0,1.69897,0.0,...,0.0,0.0,0.0,0.0,0.30103,0.0,0.30103,0.0,0.477121,0.30103
1,0.1,0.0,0.0,0.0,0.0,0.0,0.954242,0.0,1.69897,0.0,...,0.0,0.0,0.0,0.0,0.30103,0.0,0.30103,0.0,0.477121,0.30103
2,0.2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.69897,0.0,...,0.0,0.0,0.0,0.0,0.30103,0.0,0.30103,0.0,0.477121,0.477121
3,0.3,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.69897,0.0,...,0.0,0.0,0.0,0.0,0.30103,0.0,0.30103,0.0,0.477121,0.477121
4,0.4,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.716003,0.0,...,0.0,0.0,0.0,0.0,0.30103,0.0,0.30103,0.0,0.477121,0.477121


In [7]:
import os
import pandas as pd

SAVE_BASE_PATH = '../../data/interim/audio_features'

def load_and_display_saved_audio_features(save_base_path):
    for user_folder in os.listdir(save_base_path):
        user_path = os.path.join(save_base_path, user_folder)
        if not os.path.isdir(user_path):
            continue

        parquet_file = os.path.join(user_path, 'processed_audio_features02.parquet')
        if os.path.isfile(parquet_file):
            try:
                df = pd.read_parquet(parquet_file)
                print(f"\n📊 User {user_folder} - processed_audio_features02.parquet shape: {df.shape}")
            except Exception as e:
                print(f"❌ Error loading {parquet_file}: {e}")

load_and_display_saved_audio_features(SAVE_BASE_PATH)



📊 User 302_P - processed_audio_features02.parquet shape: (7575, 101)

📊 User 301_P - processed_audio_features02.parquet shape: (8239, 101)


In [9]:
import os
import pandas as pd

SAVE_BASE_PATH = '../../data/interim/audio_features'

def load_and_check_nulls(save_base_path):
    for user_folder in os.listdir(save_base_path):
        user_path = os.path.join(save_base_path, user_folder)
        if not os.path.isdir(user_path):
            continue

        parquet_file = os.path.join(user_path, 'processed_audio_features02.parquet')
        if os.path.isfile(parquet_file):
            try:
                df = pd.read_parquet(parquet_file)
                has_nulls = df.isnull().values.any()
                print(f"\n📊 User {user_folder} - processed_audio_features02.parquet")
                print(f"Contains null values? {'Yes' if has_nulls else 'No'}")
            except Exception as e:
                print(f"❌ Error loading {parquet_file}: {e}")

load_and_check_nulls(SAVE_BASE_PATH)



📊 User 302_P - processed_audio_features02.parquet
Contains null values? No

📊 User 301_P - processed_audio_features02.parquet
Contains null values? No
