# Splitting audio and video files at utterance level

In [9]:
import pandas as pd
df = pd.read_csv("EDA_Dataset.csv")
print(df.columns.tolist())
%pwd


['Unnamed: 0', 'utt_id', 'EDA', 'filename', 'session_number', 'speaker', 'start', 'end', 'utterance', 'original_order']


'/home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac'

In [11]:
import pandas as pd
from pydub import AudioSegment
import os
from tqdm import tqdm

# === Load metadata ===
final_df = pd.read_csv("EDA_Dataset.csv")

root_audio = "/home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release"
save_dir = "/home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/utterance_dac_segmented/audio"

# === Loop through utterances ===
for _, row in tqdm(final_df.iterrows(), total=len(final_df), desc="Segmenting audio"):
    session = f"Session{int(row['session_number'])}"
    base_path = f"{root_audio}/{session}/dialog/wav/{row['filename']}.wav"
    
    # Skip if source file missing
    if not os.path.exists(base_path):
        print(f"Missing source file: {base_path}")
        continue

    start_ms = int(row['start'] * 1000)
    end_ms = int(row['end'] * 1000)
    
    # Save to structured folder
    output_dir = f"{save_dir}/{session}/{row['filename']}"
    os.makedirs(output_dir, exist_ok=True)
    output_path = f"{output_dir}/{row['filename']}_utt{int(row['utt_id']):03d}.wav"
    
    try:
        audio = AudioSegment.from_wav(base_path)
        utterance = audio[start_ms:end_ms]
        utterance.export(output_path, format="wav")
    except Exception as e:
        print(f"Error segmenting {base_path}: {e}")


Segmenting audio: 100%|██████████████████████████████████████████████████████████████████████| 10039/10039 [01:12<00:00, 138.39it/s]


In [3]:
from moviepy.editor import VideoFileClip
print("✅ MoviePy works!")


  from pkg_resources import resource_filename


✅ MoviePy works!


In [None]:
import moviepy
import moviepy.video.compositing.concatenate as concat
from moviepy.editor import VideoFileClip  # reimport after patch
import os
from tqdm import tqdm
import pandas as pd

final_df = pd.read_csv("EDA_Dataset.csv")

root_video = "/home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release"
save_dir = "/home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/utterance_dac_segmented/video"

for _, row in tqdm(final_df.iterrows(), total=len(final_df), desc="Segmenting video"):
    session = f"Session{int(row['session_number'])}"
    base_path = f"{root_video}/{session}/dialog/avi/DivX/{row['filename']}.avi"

    if not os.path.exists(base_path):
        print(f"Missing source file: {base_path}")
        continue

    start, end = row['start'], row['end']

    output_dir = f"{save_dir}/{session}/{row['filename']}"
    os.makedirs(output_dir, exist_ok=True)
    output_path = f"{output_dir}/{row['filename']}_utt{int(row['utt_id']):03d}.avi"

    try:
        with VideoFileClip(base_path) as video:
            # Crop logic
            left_marker = row['filename'][5]  # 'M' or 'F' after 'Ses01'
            speaker = row['speaker'].upper()  # ensure consistent case
            w, h = video.size

            if left_marker == speaker:
                # Speaker is on the left half
                crop_x1, crop_x2 = 0, w / 2
            else:
                # Speaker is on the right half
                crop_x1, crop_x2 = w / 2, w

            # Extract utterance segment and crop
            segment = video.subclip(start, end).crop(x1=crop_x1, x2=crop_x2, y1=0, y2=h)
            segment.write_videofile(output_path, codec="libx264", audio_codec="aac", verbose=False, logger=None)

    except Exception as e:
        print(f"Error processing {base_path}: {e}")


Segmenting video:   5%|███▊                                                                   | 539/10039 [13:27<2:25:30,  1.09it/s]

Error processing /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session1/dialog/avi/DivX/Ses01M_impro04.avi: Error in file /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session1/dialog/avi/DivX/Ses01M_impro04.avi, Accessing time t=190.20-190.24 seconds, with clip duration=190 seconds, 


Segmenting video:   7%|████▊                                                                  | 677/10039 [16:48<2:58:29,  1.14s/it]

Error processing /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session1/dialog/avi/DivX/Ses01F_script02_1.avi: Error in file /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session1/dialog/avi/DivX/Ses01F_script02_1.avi, Accessing time t=331.34-331.39 seconds, with clip duration=331 seconds, 


Segmenting video:   7%|█████▏                                                                 | 725/10039 [17:45<2:27:53,  1.05it/s]

Error processing /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session1/dialog/avi/DivX/Ses01M_impro02.avi: Error in file /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session1/dialog/avi/DivX/Ses01M_impro02.avi, Accessing time t=219.04-219.08 seconds, with clip duration=219 seconds, 


Segmenting video:   8%|█████▋                                                                 | 796/10039 [19:10<2:37:01,  1.02s/it]

Error processing /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session1/dialog/avi/DivX/Ses01F_impro04.avi: Error in file /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session1/dialog/avi/DivX/Ses01F_impro04.avi, Accessing time t=200.88-200.93 seconds, with clip duration=200 seconds, 


Segmenting video:  10%|███████▏                                                              | 1023/10039 [24:17<4:20:45,  1.74s/it]

Error processing /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session1/dialog/avi/DivX/Ses01M_script02_1.avi: Error in file /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session1/dialog/avi/DivX/Ses01M_script02_1.avi, Accessing time t=404.41-404.45 seconds, with clip duration=404 seconds, 


Segmenting video:  14%|█████████▌                                                            | 1376/10039 [33:19<3:08:53,  1.31s/it]

Error processing /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session1/dialog/avi/DivX/Ses01F_script03_1.avi: Error in file /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session1/dialog/avi/DivX/Ses01F_script03_1.avi, Accessing time t=279.90-279.94 seconds, with clip duration=279 seconds, 


Segmenting video:  19%|█████████████▎                                                        | 1918/10039 [46:03<1:54:20,  1.18it/s]

Error processing /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session2/dialog/avi/DivX/Ses02F_script01_2.avi: Error in file /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session2/dialog/avi/DivX/Ses02F_script01_2.avi, Accessing time t=123.62-123.66 seconds, with clip duration=123 seconds, 


Segmenting video:  21%|██████████████▊                                                       | 2131/10039 [50:59<1:46:22,  1.24it/s]

Error processing /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session2/dialog/avi/DivX/Ses02M_script02_2.avi: Error in file /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session2/dialog/avi/DivX/Ses02M_script02_2.avi, Accessing time t=414.15-414.20 seconds, with clip duration=414 seconds, 


Segmenting video:  23%|████████████████                                                      | 2303/10039 [55:05<2:08:05,  1.01it/s]

Error processing /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session2/dialog/avi/DivX/Ses02F_script01_1.avi: Error in file /home1/anurades/multimodal-scripted-vs-improvised/multimodal-dac/IEMOCAP_full_release/Session2/dialog/avi/DivX/Ses02F_script01_1.avi, Accessing time t=355.89-355.93 seconds, with clip duration=355 seconds, 


Segmenting video:  23%|████████████████▏                                                     | 2325/10039 [55:35<3:14:56,  1.52s/it]