In [1]:
import sys
print(sys.executable)


C:\Users\sharm\.conda\envs\trisense\python.exe


In [2]:
import moviepy
print(moviepy.__version__)


1.0.3


In [3]:
import pandas as pd
import os
import cv2  # This is opencv-python
import librosa
import soundfile as sf
from moviepy.editor import VideoFileClip
from tqdm.auto import tqdm  # A nice progress bar!

# Set up our file paths
RAW_DATA_PATH = '../data/MELD_raw/'
PROC_DATA_PATH = '../data/MELD_processed/'

# Create the processed data directories if they don't exist
os.makedirs(os.path.join(PROC_DATA_PATH, 'audio', 'train'), exist_ok=True)
os.makedirs(os.path.join(PROC_DATA_PATH, 'audio', 'dev'), exist_ok=True)
os.makedirs(os.path.join(PROC_DATA_PATH, 'audio', 'test'), exist_ok=True)

os.makedirs(os.path.join(PROC_DATA_PATH, 'video_frames', 'train'), exist_ok=True)
os.makedirs(os.path.join(PROC_DATA_PATH, 'video_frames', 'dev'), exist_ok=True)
os.makedirs(os.path.join(PROC_DATA_PATH, 'video_frames', 'test'), exist_ok=True)

print("Imports complete and directories created.")

Imports complete and directories created.


In [4]:
# Define the base path to the MELD.Raw folder
# (We assume MELD.Raw is inside your data/MELD_raw/ folder)
BASE_MELD_PATH = os.path.join(RAW_DATA_PATH, 'MELD.Raw')

# Load the CSVs from their different locations
try:
    df_train = pd.read_csv(os.path.join(BASE_MELD_PATH, 'train', 'train_sent_emo.csv'))
    df_dev = pd.read_csv(os.path.join(BASE_MELD_PATH, 'dev_sent_emo.csv'))
    df_test = pd.read_csv(os.path.join(BASE_MELD_PATH, 'test_sent_emo.csv'))
    
    print("Successfully loaded all CSVs.")
    print("Training data shape:", df_train.shape)
    print("Dev data shape:", df_dev.shape)
    print("Test data shape:", df_test.shape)
    
    df_train.head()

except FileNotFoundError as e:
    print(f"Error: {e}")
    print("\n! Please make sure your 'MELD.Raw' folder is inside 'data/MELD_raw/'")
    print("Your full path should look like: trisense_project/data/MELD_raw/MELD.Raw/")

Successfully loaded all CSVs.
Training data shape: (9989, 11)
Dev data shape: (1109, 11)
Test data shape: (2610, 11)


In [5]:
!pip install mtcnn tensorflow --upgrade




In [6]:
from mtcnn import MTCNN

detector = MTCNN()
print("✅ MTCNN imported and initialized correctly!")

# quick sanity check: run a dummy face detection on an empty image
import numpy as np
dummy_img = np.zeros((100, 100, 3), dtype=np.uint8)
faces = detector.detect_faces(dummy_img)
print("Face detection test completed. Faces found:", len(faces))


✅ MTCNN imported and initialized correctly!
Face detection test completed. Faces found: 0


In [7]:
from mtcnn import MTCNN
from PIL import Image
import os
import cv2
from tqdm.auto import tqdm

# --- This is the dictionary we used in the audio-processing cell ---
split_to_video_folder = {
    'train': 'train_splits',
    'dev': 'dev_splits_complete',
    'test': 'output_repeated_splits_test'
}
# -----------------------------------------------------------------

# Initialize MTCNN face detector
detector = MTCNN()

# Directory for processed faces
FACE_SAVE_PATH = os.path.join(PROC_DATA_PATH, 'faces')
os.makedirs(FACE_SAVE_PATH, exist_ok=True)
for split in ['train', 'dev', 'test']:
    os.makedirs(os.path.join(FACE_SAVE_PATH, split), exist_ok=True)

# --- OPTIMIZED FUNCTION ---
def crop_faces_from_video(video_path, output_dir):
    """
    Extract faces from a video using MTCNN and save cropped face images.
    OPTIMIZED: Only processes 2 frames per second.
    """
    try:
        cap = cv2.VideoCapture(video_path)
        frame_num = 0
        success, frame = cap.read()
        
        # --- NEW: Get video FPS to calculate skip rate ---
        fps = cap.get(cv2.CAP_PROP_FPS)
        if fps == 0 or fps is None:
            fps = 30  # Default to 30 if FPS is not readable
        
        # Process 2 frames per second
        frame_skip = int(fps / 2)
        if frame_skip == 0:
            frame_skip = 1  # Ensure we skip at least 1, not 0
        # ------------------------------------------------
        
        while success:
            # --- NEW: Check if this is a frame we should process ---
            if frame_num % frame_skip == 0:
                rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                detections = detector.detect_faces(rgb)
                
                for i, det in enumerate(detections):
                    if det['confidence'] < 0.9:
                        continue
                    x, y, w, h = det['box']
                    x, y = max(0, x), max(0, y)
                    face = rgb[y:y+h, x:x+w]
                    
                    face_img = Image.fromarray(face).resize((224, 224))
                    save_name = f"frame_{frame_num:04d}_face{i}.jpg" # Use frame_num for unique names
                    face_img.save(os.path.join(output_dir, save_name))
            
            # ------------------------------------------------
            
            frame_num += 1
            success, frame = cap.read()
            
        cap.release()
    except Exception as e:
        print(f"Error processing video {video_path}: {e}")
# --- END OPTIMIZED FUNCTION ---

def process_split_faces(df, split='train'):
    """
    Process all MELD videos in a given split to extract faces organized by emotion.
    """
    # Get the correct sub-folder name for the videos
    video_subfolder = split_to_video_folder[split]
    split_video_path = os.path.join(BASE_MELD_PATH, split, video_subfolder)
    
    split_face_path = os.path.join(FACE_SAVE_PATH, split)
    
    print(f"Processing {split} set... searching for videos in: {split_video_path}")
    
    for _, row in tqdm(df.iterrows(), total=len(df), desc=f"Processing {split} set"):
        emotion = row['Emotion']
        video_filename = f"dia{row['Dialogue_ID']}_utt{row['Utterance_ID']}.mp4"
        
        video_path = os.path.join(split_video_path, video_filename)
        
        if not os.path.exists(video_path):
            continue
        
        emotion_dir = os.path.join(split_face_path, emotion)
        os.makedirs(emotion_dir, exist_ok=True)
        
        # Call the face extraction function
        crop_faces_from_video(video_path, emotion_dir)


# --- Run face extraction ---
# (Make sure Cells 3 and 4 have been run to define df_train, etc.)
process_split_faces(df_train, 'train')
process_split_faces(df_dev, 'dev')
process_split_faces(df_test, 'test')

print("✅ FER face extraction complete! Check '../data/MMELD_processed/faces/' for results.")

Processing train set... searching for videos in: ../data/MELD_raw/MELD.Raw\train\train_splits


Processing train set:   0%|          | 0/9989 [00:00<?, ?it/s]

Processing dev set... searching for videos in: ../data/MELD_raw/MELD.Raw\dev\dev_splits_complete


Processing dev set:   0%|          | 0/1109 [00:00<?, ?it/s]

Processing test set... searching for videos in: ../data/MELD_raw/MELD.Raw\test\output_repeated_splits_test


Processing test set:   0%|          | 0/2610 [00:00<?, ?it/s]

✅ FER face extraction complete! Check '../data/MMELD_processed/faces/' for results.


In [8]:
import soundfile as sf
from moviepy.editor import VideoFileClip
import librosa
from tqdm.auto import tqdm
import os
import pandas as pd

# We need to re-define these paths in case the kernel was restarted
RAW_DATA_PATH = '../data/MELD_raw/'
PROC_DATA_PATH = '../data/MELD_processed/'

# We also need the base MELD path and the split mapping again
BASE_MELD_PATH = os.path.join(RAW_DATA_PATH, 'MELD.Raw')
split_to_video_folder = {
    'train': 'train_splits',
    'dev': 'dev_splits_complete',
    'test': 'output_repeated_splits_test'
}

print("Starting Audio (SER) and Text (TER) processing...")

def process_audio_and_text(df, data_split):
    """
    This function processes one data split (train, dev, or test).
    
    For each row, it:
    1. Extracts audio, resamples to 16kHz, and saves as .wav
    2. Collects text, emotion, and file paths into a list
    """
    
    print(f"Processing {data_split} data...")
    
    # Get the correct sub-folder name for the videos
    video_subfolder = split_to_video_folder[data_split]
    
    # We will also collect the text data into a list
    text_data_list = []
    
    # Use tqdm to show a progress bar
    for index, row in tqdm(df.iterrows(), total=df.shape[0]):
        try:
            # 1. Define file names and paths
            dialogue_id = row['Dialogue_ID']
            utterance_id = row['Utterance_ID']
            file_name_base = f"dia{dialogue_id}_utt{utterance_id}"
            
            # Raw video file path
            raw_video_path = os.path.join(BASE_MELD_PATH, data_split, video_subfolder, f"{file_name_base}.mp4")

            # --- 1. Audio Preprocessing (SER) ---
            audio_save_path = os.path.join(PROC_DATA_PATH, 'audio', data_split, f"{file_name_base}.wav")
            
            # Use moviepy to extract audio
            with VideoFileClip(raw_video_path) as video_clip:
                audio_clip = video_clip.audio
                temp_audio_path = os.path.join(PROC_DATA_PATH, 'temp_audio.wav')
                audio_clip.write_audiofile(temp_audio_path, logger=None, verbose=False)
            
            # Load with librosa to resample to 16kHz (for Wav2Vec2)
            y, sr = librosa.load(temp_audio_path, sr=16000)
            
            # Save the resampled audio
            sf.write(audio_save_path, y, sr)
            
            # --- 2. Text Preprocessing (TER) ---
            text_data_list.append({
                'text': row['Utterance'],
                'emotion': row['Emotion'],
                'audio_path': audio_save_path, # We save the new audio path
            })
            
        except Exception as e:
            # We'll skip errors, but print them to know
            print(f"Warning: Error processing {file_name_base}: {e}. Skipping file.")
            
    # Clean up the temporary audio file
    if os.path.exists(os.path.join(PROC_DATA_PATH, 'temp_audio.wav')):
        os.remove(os.path.join(PROC_DATA_PATH, 'temp_audio.wav'))
        
    # Now, save the collected text data as a new, clean CSV
    text_df = pd.DataFrame(text_data_list)
    text_csv_path = os.path.join(PROC_DATA_PATH, f'{data_split}_text.csv')
    text_df.to_csv(text_csv_path, index=False)
    
    print(f"Finished processing {data_split}. Text data saved to {text_csv_path}")
    return text_df

# --- Run the Processing ---
# We use the DataFrames (df_train, df_dev, df_test) that are still in memory from Cell 2
# If you get a 'NameError', just re-run Cell 2 first.

df_train_proc = process_audio_and_text(df_train, 'train')
df_dev_proc = process_audio_and_text(df_dev, 'dev')
df_test_proc = process_audio_and_text(df_test, 'test')

print("\n--- All Preprocessing Complete! ---")
print("Your datasets are ready in '../data/MELD_processed/'")

Starting Audio (SER) and Text (TER) processing...
Processing train data...


 12%|████▊                                    | 1165/9989 [05:20<47:11,  3.12it/s]

Here are the file infos returned by ffmpeg:

ffmpeg version 7.1-essentials_build-www.gyan.dev Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 14.2.0 (Rev1, Built by MSYS2 project)
  configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-zlib --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-sdl2 --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxvid --enable-libaom --enable-libopenjpeg --enable-libvpx --enable-mediafoundation --enable-libass --enable-libfreetype --enable-libfribidi --enable-libharfbuzz --enable-libvidstab --enable-libvmaf --enable-libzimg --enable-amf --enable-cuda-llvm --enable-cuvid --enable-dxva2 --enable-d3d11va --enable-d3d12va --enable-ffnvcodec --enable-libvpl --enable-nvdec --enable-nvenc --enable-vaapi --enable-libgme --enable-libopenmpt --ena

100%|█████████████████████████████████████████| 9989/9989 [41:49<00:00,  3.98it/s]


Finished processing train. Text data saved to ../data/MELD_processed/train_text.csv
Processing dev data...


 98%|████████████████████████████████████████ | 1084/1109 [04:24<00:06,  4.06it/s]

Please check that you entered the correct path.. Skipping file.


100%|█████████████████████████████████████████| 1109/1109 [04:30<00:00,  4.11it/s]


Finished processing dev. Text data saved to ../data/MELD_processed/dev_text.csv
Processing test data...


100%|█████████████████████████████████████████| 2610/2610 [10:28<00:00,  4.15it/s]

Finished processing test. Text data saved to ../data/MELD_processed/test_text.csv

--- All Preprocessing Complete! ---
Your datasets are ready in '../data/MELD_processed/'



