# Splitting improv vs scripted

In [118]:
import re 
import os
import pandas as pd
import numpy as np
from transformers import AutoTokenizer
from transformers import DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
import evaluate
from collections import Counter
from sklearn.model_selection import train_test_split


# 1. Access the corresponding .txt, .wav, and .avi files for each EDA label
# Extract the conversation filename and speaker informationfrom the dataset
eda_df = pd.read_csv("eda_iemocap_no_utts_dataset.csv")
eda_df = eda_df[["speaker", "utt_id", "EDA"]]
filename_ids = []
speaker_M_F = []
session_numbers = []
for i, row in eda_df.iterrows():
    match = re.search(r"b'(Ses(\d+)[MF]_.+\d+.*)_([MF])", row["speaker"])
    filename_ids.append(match.group(1))
    session_numbers.append(int(match.group(2))) 
    speaker_M_F.append(match.group(3))
eda_df = eda_df.drop(columns=["speaker"])
eda_df["filename"] = filename_ids
eda_df["filename"] = eda_df["filename"].astype(str)
eda_df["session_number"] = session_numbers
eda_df["session_number"] = eda_df["session_number"].astype(int)
eda_df["speaker"] = speaker_M_F
eda_df["speaker"] = eda_df["speaker"].astype(str)
eda_df["utt_id"] = eda_df["utt_id"].astype(int)
# Access transcipt files based on filename
utt_df = []
root_dir = "IEMOCAP_full_release/"
for i in range(1, 6):
    directory = os.path.join(root_dir, f"Session{i}/dialog/transcriptions/")
    for entry in os.scandir(directory):  
        if entry.is_file() and entry.path.endswith(".txt"):  # check if it's a file
            try:
                with open(entry.path, "r") as file:
                    filename = entry.path.split("/")[-1][:-4]
                    lines = file.readlines()
                    for order, line in enumerate(lines):
                        speaker_info, utterance = line.split(":")[0], line.split(":")[1]
                        pattern = r"(F|M)(\d+)\s\[(\d+\.\d+)-(\d+\.\d+)\]"
                        match = re.search(pattern, speaker_info)
                        if match is None:
                            continue
                        speaker_f_m = match.group(1)
                        utt_id = match.group(2)
                        start = match.group(3)
                        end = match.group(4)
                        utt_df.append({"utt_id": int(utt_id), "filename": str(filename), "improv_script_id": str(filename[7:]), "start": float(start), "end": float(end), "speaker": str(speaker_f_m.strip()), "utterance": utterance.strip(), "session_number": int(i), "original_order": order})
            except:
                #print(entry.path) # these are meta files with ._ prepended to text file name
                continue
utt_df = pd.DataFrame(utt_df)
# Combine the EDA and utterances together
final_df = pd.merge(eda_df, utt_df, on=["utt_id", "session_number", "filename", "speaker"])
final_df

Unnamed: 0,utt_id,EDA,filename,session_number,speaker,improv_script_id,start,end,utterance,original_order
0,0,sd,Ses01M_impro07,1,M,impro07,2.6812,7.9800,Check this out. You know how I've told you I'...,0
1,0,b,Ses01M_impro07,1,F,impro07,7.6300,8.5700,Yeah.,1
2,1,sd,Ses01M_impro07,1,M,impro07,8.2200,14.7500,"Well, this is totally random, I got this full ...",2
3,1,qy,Ses01M_impro07,1,F,impro07,13.9500,21.1200,[LAUGHTER]. For softball? That's unbelievable....,3
4,2,qy,Ses01M_impro07,1,M,impro07,15.5400,20.6700,For softball. They're going to pay me to go t...,4
...,...,...,...,...,...,...,...,...,...,...
10034,24,qy,Ses05F_script01_3,5,F,script01_3,404.1923,406.6600,Do you still feel like that?,63
10035,39,sd,Ses05F_script01_3,5,M,script01_3,407.9600,410.7823,"I I want you now, Annie.",64
10036,25,qy,Ses05F_script01_3,5,F,script01_3,410.4317,427.7079,Because you can't feel like that anymore Chris...,65
10037,40,sd,Ses05F_script01_3,5,M,script01_3,426.7965,431.8242,"Oh Annie. Annie, I am going to make you a for...",66


In [125]:
def impro_splits():    
    # session 1
    df_impro_session_1_impro_1_M = final_df[(final_df['filename'].str.contains('M_impro01')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])
    df_impro_session_1_impro_1_F = final_df[(final_df['filename'].str.contains('F_impro01')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])
    df_impro_session_1_impro_2_M = final_df[(final_df['filename'].str.contains('M_impro02')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])
    df_impro_session_1_impro_2_F = final_df[(final_df['filename'].str.contains('F_impro02')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])
    df_impro_session_1_impro_3_M = final_df[(final_df['filename'].str.contains('M_impro03')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])
    df_impro_session_1_impro_3_F = final_df[(final_df['filename'].str.contains('F_impro03')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])
    df_impro_session_1_impro_4_M = final_df[(final_df['filename'].str.contains('M_impro04')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])
    df_impro_session_1_impro_4_F = final_df[(final_df['filename'].str.contains('F_impro04')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])
    df_impro_session_1_impro_5_M = final_df[(final_df['filename'].str.contains('M_impro05')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])
    df_impro_session_1_impro_5_F = final_df[(final_df['filename'].str.contains('F_impro05')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])
    df_impro_session_1_impro_6_M = final_df[(final_df['filename'].str.contains('M_impro06')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])
    df_impro_session_1_impro_6_F = final_df[(final_df['filename'].str.contains('F_impro06')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])
    df_impro_session_1_impro_7_M = final_df[(final_df['filename'].str.contains('M_impro07')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])
    df_impro_session_1_impro_7_F = final_df[(final_df['filename'].str.contains('F_impro07')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])
    df_impro_session_1_impro_8_M = final_df[(final_df['filename'].str.contains('M_impro08')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])
    df_impro_session_1_impro_8_F = final_df[(final_df['filename'].str.contains('F_impro08')) & (final_df['session_number'] == 1)].sort_values(by=['filename', 'original_order'])

    # session 2
    df_impro_session_2_impro_1_M = final_df[(final_df['filename'].str.contains('M_impro01')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])
    df_impro_session_2_impro_1_F = final_df[(final_df['filename'].str.contains('F_impro01')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])
    df_impro_session_2_impro_2_M = final_df[(final_df['filename'].str.contains('M_impro02')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])
    df_impro_session_2_impro_2_F = final_df[(final_df['filename'].str.contains('F_impro02')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])
    df_impro_session_2_impro_3_M = final_df[(final_df['filename'].str.contains('M_impro03')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])
    df_impro_session_2_impro_3_F = final_df[(final_df['filename'].str.contains('F_impro03')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])
    df_impro_session_2_impro_4_M = final_df[(final_df['filename'].str.contains('M_impro04')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])
    df_impro_session_2_impro_4_F = final_df[(final_df['filename'].str.contains('F_impro04')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])
    df_impro_session_2_impro_5_M = final_df[(final_df['filename'].str.contains('M_impro05')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])
    df_impro_session_2_impro_5_F = final_df[(final_df['filename'].str.contains('F_impro05')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])
    df_impro_session_2_impro_6_M = final_df[(final_df['filename'].str.contains('M_impro06')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])
    df_impro_session_2_impro_6_F = final_df[(final_df['filename'].str.contains('F_impro06')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])
    df_impro_session_2_impro_7_M = final_df[(final_df['filename'].str.contains('M_impro07')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])
    df_impro_session_2_impro_7_F = final_df[(final_df['filename'].str.contains('F_impro07')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])
    df_impro_session_2_impro_8_M = final_df[(final_df['filename'].str.contains('M_impro08')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])
    df_impro_session_2_impro_8_F = final_df[(final_df['filename'].str.contains('F_impro08')) & (final_df['session_number'] == 2)].sort_values(by=['filename', 'original_order'])

    # session 3
    df_impro_session_3_impro_1_M = final_df[(final_df['filename'].str.contains('M_impro01')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])
    df_impro_session_3_impro_1_F = final_df[(final_df['filename'].str.contains('F_impro01')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])
    df_impro_session_3_impro_2_M = final_df[(final_df['filename'].str.contains('M_impro02')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])
    df_impro_session_3_impro_2_F = final_df[(final_df['filename'].str.contains('F_impro02')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])
    df_impro_session_3_impro_3_M = final_df[(final_df['filename'].str.contains('M_impro03')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])
    df_impro_session_3_impro_3_F = final_df[(final_df['filename'].str.contains('F_impro03')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])
    df_impro_session_3_impro_4_M = final_df[(final_df['filename'].str.contains('M_impro04')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])
    df_impro_session_3_impro_4_F = final_df[(final_df['filename'].str.contains('F_impro04')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])
    df_impro_session_3_impro_5_M = final_df[(final_df['filename'].str.contains('M_impro05')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])
    df_impro_session_3_impro_5_F = final_df[(final_df['filename'].str.contains('F_impro05')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])
    df_impro_session_3_impro_6_M = final_df[(final_df['filename'].str.contains('M_impro06')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])
    df_impro_session_3_impro_6_F = final_df[(final_df['filename'].str.contains('F_impro06')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])
    df_impro_session_3_impro_7_M = final_df[(final_df['filename'].str.contains('M_impro07')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])
    df_impro_session_3_impro_7_F = final_df[(final_df['filename'].str.contains('F_impro07')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])
    df_impro_session_3_impro_8_M = final_df[(final_df['filename'].str.contains('M_impro08')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])
    df_impro_session_3_impro_8_F = final_df[(final_df['filename'].str.contains('F_impro08')) & (final_df['session_number'] == 3)].sort_values(by=['filename', 'original_order'])

    # session 4
    df_impro_session_4_impro_1_M = final_df[(final_df['filename'].str.contains('M_impro01')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])
    df_impro_session_4_impro_1_F = final_df[(final_df['filename'].str.contains('F_impro01')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])
    df_impro_session_4_impro_2_M = final_df[(final_df['filename'].str.contains('M_impro02')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])
    df_impro_session_4_impro_2_F = final_df[(final_df['filename'].str.contains('F_impro02')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])
    df_impro_session_4_impro_3_M = final_df[(final_df['filename'].str.contains('M_impro03')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])
    df_impro_session_4_impro_3_F = final_df[(final_df['filename'].str.contains('F_impro03')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])
    df_impro_session_4_impro_4_M = final_df[(final_df['filename'].str.contains('M_impro04')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])
    df_impro_session_4_impro_4_F = final_df[(final_df['filename'].str.contains('F_impro04')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])
    df_impro_session_4_impro_5_M = final_df[(final_df['filename'].str.contains('M_impro05')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])
    df_impro_session_4_impro_5_F = final_df[(final_df['filename'].str.contains('F_impro05')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])
    df_impro_session_4_impro_6_M = final_df[(final_df['filename'].str.contains('M_impro06')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])
    df_impro_session_4_impro_6_F = final_df[(final_df['filename'].str.contains('F_impro06')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])
    df_impro_session_4_impro_7_M = final_df[(final_df['filename'].str.contains('M_impro07')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])
    df_impro_session_4_impro_7_F = final_df[(final_df['filename'].str.contains('F_impro07')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])
    df_impro_session_4_impro_8_M = final_df[(final_df['filename'].str.contains('M_impro08')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])
    df_impro_session_4_impro_8_F = final_df[(final_df['filename'].str.contains('F_impro08')) & (final_df['session_number'] == 4)].sort_values(by=['filename', 'original_order'])

    # session 5
    df_impro_session_5_impro_1_M = final_df[(final_df['filename'].str.contains('M_impro01')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])
    df_impro_session_5_impro_1_F = final_df[(final_df['filename'].str.contains('F_impro01')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])
    df_impro_session_5_impro_2_M = final_df[(final_df['filename'].str.contains('M_impro02')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])
    df_impro_session_5_impro_2_F = final_df[(final_df['filename'].str.contains('F_impro02')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])
    df_impro_session_5_impro_3_M = final_df[(final_df['filename'].str.contains('M_impro03')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])
    df_impro_session_5_impro_3_F = final_df[(final_df['filename'].str.contains('F_impro03')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])
    df_impro_session_5_impro_4_M = final_df[(final_df['filename'].str.contains('M_impro04')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])
    df_impro_session_5_impro_4_F = final_df[(final_df['filename'].str.contains('F_impro04')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])
    df_impro_session_5_impro_5_M = final_df[(final_df['filename'].str.contains('M_impro05')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])
    df_impro_session_5_impro_5_F = final_df[(final_df['filename'].str.contains('F_impro05')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])
    df_impro_session_5_impro_6_M = final_df[(final_df['filename'].str.contains('M_impro06')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])
    df_impro_session_5_impro_6_F = final_df[(final_df['filename'].str.contains('F_impro06')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])
    df_impro_session_5_impro_7_M = final_df[(final_df['filename'].str.contains('M_impro07')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])
    df_impro_session_5_impro_7_F = final_df[(final_df['filename'].str.contains('F_impro07')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])
    df_impro_session_5_impro_8_M = final_df[(final_df['filename'].str.contains('M_impro08')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])
    df_impro_session_5_impro_8_F = final_df[(final_df['filename'].str.contains('F_impro08')) & (final_df['session_number'] == 5)].sort_values(by=['filename', 'original_order'])

    # need to this for each impro across sections because although they are the same impro, the lines are not memorized perfectly and so there are some length differences
    min_impro_1 = min([len(df_impro_session_1_impro_1_F), len(df_impro_session_1_impro_1_M),\
                        len(df_impro_session_2_impro_1_F), len(df_impro_session_2_impro_1_M),\
                        len(df_impro_session_3_impro_1_F), len(df_impro_session_3_impro_1_M),\
                        len(df_impro_session_4_impro_1_F), len(df_impro_session_4_impro_1_M),\
                        len(df_impro_session_5_impro_1_F), len(df_impro_session_5_impro_1_M)])

    min_impro_2 = min([len(df_impro_session_1_impro_2_F), len(df_impro_session_1_impro_2_M),\
                        len(df_impro_session_2_impro_2_F), len(df_impro_session_2_impro_2_M),\
                        len(df_impro_session_3_impro_2_F), len(df_impro_session_3_impro_2_M),\
                        len(df_impro_session_4_impro_2_F), len(df_impro_session_4_impro_2_M),\
                        len(df_impro_session_5_impro_2_F), len(df_impro_session_5_impro_2_M)])

    min_impro_3 = min([len(df_impro_session_1_impro_3_F), len(df_impro_session_1_impro_3_M),\
                        len(df_impro_session_2_impro_3_F), len(df_impro_session_2_impro_3_M),\
                        len(df_impro_session_3_impro_3_F), len(df_impro_session_3_impro_3_M),\
                        len(df_impro_session_4_impro_3_F), len(df_impro_session_4_impro_3_M),\
                        len(df_impro_session_5_impro_3_F), len(df_impro_session_5_impro_3_M)])

    min_impro_4 = min([len(df_impro_session_1_impro_4_F), len(df_impro_session_1_impro_4_M),\
                        len(df_impro_session_2_impro_4_F), len(df_impro_session_2_impro_4_M),\
                        len(df_impro_session_3_impro_4_F), len(df_impro_session_3_impro_4_M),\
                        len(df_impro_session_4_impro_4_F), len(df_impro_session_4_impro_4_M),\
                        len(df_impro_session_5_impro_4_F), len(df_impro_session_5_impro_4_M)])

    min_impro_5 = min([len(df_impro_session_1_impro_5_F), len(df_impro_session_1_impro_5_M),\
                            len(df_impro_session_2_impro_5_F), len(df_impro_session_2_impro_5_M),\
                            len(df_impro_session_3_impro_5_F), len(df_impro_session_3_impro_5_M),\
                            len(df_impro_session_4_impro_5_F), len(df_impro_session_4_impro_5_M),\
                            len(df_impro_session_5_impro_5_F), len(df_impro_session_5_impro_5_M)])

    min_impro_6 = min([len(df_impro_session_1_impro_6_F), len(df_impro_session_1_impro_6_M),\
                            len(df_impro_session_2_impro_6_F), len(df_impro_session_2_impro_6_M),\
                            len(df_impro_session_3_impro_6_F), len(df_impro_session_3_impro_6_M),\
                            len(df_impro_session_4_impro_6_F), len(df_impro_session_4_impro_6_M),\
                            len(df_impro_session_5_impro_6_F), len(df_impro_session_5_impro_6_M)])

    min_impro_7 = min([len(df_impro_session_1_impro_7_F), len(df_impro_session_1_impro_7_M),\
                            len(df_impro_session_2_impro_7_F), len(df_impro_session_2_impro_7_M),\
                            len(df_impro_session_3_impro_7_F), len(df_impro_session_3_impro_7_M),\
                            len(df_impro_session_4_impro_7_F), len(df_impro_session_4_impro_7_M),\
                            len(df_impro_session_5_impro_7_F), len(df_impro_session_5_impro_7_M)])

    min_impro_8 = min([len(df_impro_session_1_impro_8_F), len(df_impro_session_1_impro_8_M),\
                            len(df_impro_session_2_impro_8_F), len(df_impro_session_2_impro_8_M),\
                            len(df_impro_session_3_impro_8_F), len(df_impro_session_3_impro_8_M),\
                            len(df_impro_session_4_impro_8_F), len(df_impro_session_4_impro_8_M),\
                            len(df_impro_session_5_impro_8_F), len(df_impro_session_5_impro_8_M)])

    train_impro_1 = int(min_impro_1*0.8)
    val_impro_1 = (min_impro_1-train_impro_1)//2

    train_impro_2 = int(min_impro_2*0.8)
    val_impro_2 = (min_impro_2-train_impro_2)//2

    train_impro_3 = int(min_impro_3*0.8)
    val_impro_3 = (min_impro_3-train_impro_3)//2

    train_impro_4 = int(min_impro_4*0.8)
    val_impro_4 = (min_impro_4-train_impro_4)//2

    train_impro_5 = int(min_impro_5*0.8)
    val_impro_5 = (min_impro_5-train_impro_5)//2

    train_impro_6 = int(min_impro_6*0.8)
    val_impro_6 = (min_impro_6-train_impro_6)//2

    train_impro_7 = int(min_impro_7*0.8)
    val_impro_7 = (min_impro_7-train_impro_7)//2

    train_impro_8 = int(min_impro_8*0.8)
    val_impro_8 = (min_impro_8-train_impro_8)//2


    #df_impro_session_1_impro_1_F.sample(frac=1) # going to ignore this for now and not shuffle within each impro because that means merging sessions... to confusing and not perfect matching
    df_impro_train = pd.concat([df_impro_session_1_impro_1_F[:train_impro_1],
                                df_impro_session_1_impro_1_M[:train_impro_1],
                                df_impro_session_2_impro_1_F[:train_impro_1], 
                                df_impro_session_2_impro_1_M[:train_impro_1], 
                                df_impro_session_3_impro_1_F[:train_impro_1],
                                df_impro_session_3_impro_1_M[:train_impro_1],
                                df_impro_session_4_impro_1_F[:train_impro_1],
                                df_impro_session_4_impro_1_M[:train_impro_1],
                                df_impro_session_5_impro_1_F[:train_impro_1],
                                df_impro_session_5_impro_1_M[:train_impro_1],
                                df_impro_session_1_impro_2_F[:train_impro_2],
                                df_impro_session_1_impro_2_M[:train_impro_2],
                                df_impro_session_2_impro_2_F[:train_impro_2], 
                                df_impro_session_2_impro_2_M[:train_impro_2], 
                                df_impro_session_3_impro_2_F[:train_impro_2],
                                df_impro_session_3_impro_2_M[:train_impro_2],
                                df_impro_session_4_impro_2_F[:train_impro_2],
                                df_impro_session_4_impro_2_M[:train_impro_2],
                                df_impro_session_5_impro_2_F[:train_impro_2],
                                df_impro_session_5_impro_2_M[:train_impro_2],
                                df_impro_session_1_impro_3_F[:train_impro_3],
                                df_impro_session_1_impro_3_M[:train_impro_3],
                                df_impro_session_2_impro_3_F[:train_impro_3], 
                                df_impro_session_2_impro_3_M[:train_impro_3], 
                                df_impro_session_3_impro_3_F[:train_impro_3],
                                df_impro_session_3_impro_3_M[:train_impro_3],
                                df_impro_session_4_impro_3_F[:train_impro_3],
                                df_impro_session_4_impro_3_M[:train_impro_3],
                                df_impro_session_5_impro_3_F[:train_impro_3],
                                df_impro_session_5_impro_3_M[:train_impro_3],
                df_impro_session_1_impro_4_F[:train_impro_4],
                                df_impro_session_1_impro_4_M[:train_impro_4],
                                df_impro_session_2_impro_4_F[:train_impro_4], 
                                df_impro_session_2_impro_4_M[:train_impro_4], 
                                df_impro_session_3_impro_4_F[:train_impro_4],
                                df_impro_session_3_impro_4_M[:train_impro_4],
                                df_impro_session_4_impro_4_F[:train_impro_4],
                                df_impro_session_4_impro_4_M[:train_impro_4],
                                df_impro_session_5_impro_4_F[:train_impro_4],
                                df_impro_session_5_impro_4_M[:train_impro_4],
                                df_impro_session_1_impro_5_F[:train_impro_5],
                                df_impro_session_1_impro_5_M[:train_impro_5],
                                df_impro_session_2_impro_5_F[:train_impro_5], 
                                df_impro_session_2_impro_5_M[:train_impro_5], 
                                df_impro_session_3_impro_5_F[:train_impro_5],
                                df_impro_session_3_impro_5_M[:train_impro_5],
                                df_impro_session_4_impro_5_F[:train_impro_5],
                                df_impro_session_4_impro_5_M[:train_impro_5],
                                df_impro_session_5_impro_5_F[:train_impro_5],
                                df_impro_session_5_impro_5_M[:train_impro_5],
                                df_impro_session_1_impro_6_F[:train_impro_6],
                                df_impro_session_1_impro_6_M[:train_impro_6],
                                df_impro_session_2_impro_6_F[:train_impro_6], 
                                df_impro_session_2_impro_6_M[:train_impro_6], 
                                df_impro_session_3_impro_6_F[:train_impro_6],
                                df_impro_session_3_impro_6_M[:train_impro_6],
                                df_impro_session_4_impro_6_F[:train_impro_6],
                                df_impro_session_4_impro_6_M[:train_impro_6],
                                df_impro_session_5_impro_6_F[:train_impro_6],
                                df_impro_session_5_impro_6_M[:train_impro_6],
                                df_impro_session_1_impro_7_F[:train_impro_7],
                                df_impro_session_1_impro_7_M[:train_impro_7],
                                df_impro_session_2_impro_7_F[:train_impro_7], 
                                df_impro_session_2_impro_7_M[:train_impro_7], 
                                df_impro_session_3_impro_7_F[:train_impro_7],
                                df_impro_session_3_impro_7_M[:train_impro_7],
                                df_impro_session_4_impro_7_F[:train_impro_7],
                                df_impro_session_4_impro_7_M[:train_impro_7],
                                df_impro_session_5_impro_7_F[:train_impro_7],
                                df_impro_session_5_impro_7_M[:train_impro_7],
                                df_impro_session_1_impro_8_F[:train_impro_8],
                                df_impro_session_1_impro_8_M[:train_impro_8],
                                df_impro_session_2_impro_8_F[:train_impro_8], 
                                df_impro_session_2_impro_8_M[:train_impro_8], 
                                df_impro_session_3_impro_8_F[:train_impro_8],
                                df_impro_session_3_impro_8_M[:train_impro_8],
                                df_impro_session_4_impro_8_F[:train_impro_8],
                                df_impro_session_4_impro_8_M[:train_impro_8],
                                df_impro_session_5_impro_8_F[:train_impro_8],
                                df_impro_session_5_impro_8_M[:train_impro_8]])  
    df_impro_val = pd.concat([df_impro_session_1_impro_1_F[train_impro_1: train_impro_1+val_impro_1],
                                df_impro_session_1_impro_1_M[train_impro_1: train_impro_1+val_impro_1],
                                df_impro_session_2_impro_1_F[train_impro_1: train_impro_1+val_impro_1], 
                                df_impro_session_2_impro_1_M[train_impro_1: train_impro_1+val_impro_1], 
                                df_impro_session_3_impro_1_F[train_impro_1: train_impro_1+val_impro_1],
                                df_impro_session_3_impro_1_M[train_impro_1: train_impro_1+val_impro_1],
                                df_impro_session_4_impro_1_F[train_impro_1: train_impro_1+val_impro_1],
                                df_impro_session_4_impro_1_M[train_impro_1: train_impro_1+val_impro_1],
                                df_impro_session_5_impro_1_F[train_impro_1: train_impro_1+val_impro_1],
                                df_impro_session_5_impro_1_M[train_impro_1: train_impro_1+val_impro_1],
                                df_impro_session_1_impro_2_F[train_impro_2: train_impro_2+val_impro_2],
                                df_impro_session_1_impro_2_M[train_impro_2: train_impro_2+val_impro_2],
                                df_impro_session_2_impro_2_F[train_impro_2: train_impro_2+val_impro_2], 
                                df_impro_session_2_impro_2_M[train_impro_2: train_impro_2+val_impro_2], 
                                df_impro_session_3_impro_2_F[train_impro_2: train_impro_2+val_impro_2],
                                df_impro_session_3_impro_2_M[train_impro_2: train_impro_2+val_impro_2],
                                df_impro_session_4_impro_2_F[train_impro_2: train_impro_2+val_impro_2],
                                df_impro_session_4_impro_2_M[train_impro_2: train_impro_2+val_impro_2],
                                df_impro_session_5_impro_2_F[train_impro_2: train_impro_2+val_impro_2],
                                df_impro_session_5_impro_2_M[train_impro_2: train_impro_2+val_impro_2],
                                df_impro_session_1_impro_3_F[train_impro_3: train_impro_3+val_impro_3],
                                df_impro_session_1_impro_3_M[train_impro_3: train_impro_3+val_impro_3],
                                df_impro_session_2_impro_3_F[train_impro_3: train_impro_3+val_impro_3], 
                                df_impro_session_2_impro_3_M[train_impro_3: train_impro_3+val_impro_3], 
                                df_impro_session_3_impro_3_F[train_impro_3: train_impro_3+val_impro_3],
                                df_impro_session_3_impro_3_M[train_impro_3: train_impro_3+val_impro_3],
                                df_impro_session_4_impro_3_F[train_impro_3: train_impro_3+val_impro_3],
                                df_impro_session_4_impro_3_M[train_impro_3: train_impro_3+val_impro_3],
                                df_impro_session_5_impro_3_F[train_impro_3: train_impro_3+val_impro_3],
                                df_impro_session_5_impro_3_M[train_impro_3: train_impro_3+val_impro_3],
                                df_impro_session_1_impro_4_F[train_impro_4: train_impro_4+val_impro_4],
                                df_impro_session_1_impro_4_M[train_impro_4: train_impro_4+val_impro_4],
                                df_impro_session_2_impro_4_F[train_impro_4: train_impro_4+val_impro_4], 
                                df_impro_session_2_impro_4_M[train_impro_4: train_impro_4+val_impro_4], 
                                df_impro_session_3_impro_4_F[train_impro_4: train_impro_4+val_impro_4],
                                df_impro_session_3_impro_4_M[train_impro_4: train_impro_4+val_impro_4],
                                df_impro_session_4_impro_4_F[train_impro_4: train_impro_4+val_impro_4],
                                df_impro_session_4_impro_4_M[train_impro_4: train_impro_4+val_impro_4],
                                df_impro_session_5_impro_4_F[train_impro_4: train_impro_4+val_impro_4],
                                df_impro_session_5_impro_4_M[train_impro_4: train_impro_4+val_impro_4],
                                df_impro_session_1_impro_5_F[train_impro_5: train_impro_5+val_impro_5],
                                df_impro_session_1_impro_5_M[train_impro_5: train_impro_5+val_impro_5],
                                df_impro_session_2_impro_5_F[train_impro_5: train_impro_5+val_impro_5], 
                                df_impro_session_2_impro_5_M[train_impro_5: train_impro_5+val_impro_5], 
                                df_impro_session_3_impro_5_F[train_impro_5: train_impro_5+val_impro_5],
                                df_impro_session_3_impro_5_M[train_impro_5: train_impro_5+val_impro_5],
                                df_impro_session_4_impro_5_F[train_impro_5: train_impro_5+val_impro_5],
                                df_impro_session_4_impro_5_M[train_impro_5: train_impro_5+val_impro_5],
                                df_impro_session_5_impro_5_F[train_impro_5: train_impro_5+val_impro_5],
                                df_impro_session_5_impro_5_M[train_impro_5: train_impro_5+val_impro_5],
                                df_impro_session_1_impro_6_F[train_impro_6: train_impro_6+val_impro_6],
                                df_impro_session_1_impro_6_M[train_impro_6: train_impro_6+val_impro_6],
                                df_impro_session_2_impro_6_F[train_impro_6: train_impro_6+val_impro_6], 
                                df_impro_session_2_impro_6_M[train_impro_6: train_impro_6+val_impro_6], 
                                df_impro_session_3_impro_6_F[train_impro_6: train_impro_6+val_impro_6],
                                df_impro_session_3_impro_6_M[train_impro_6: train_impro_6+val_impro_6],
                                df_impro_session_4_impro_6_F[train_impro_6: train_impro_6+val_impro_6],
                                df_impro_session_4_impro_6_M[train_impro_6: train_impro_6+val_impro_6],
                                df_impro_session_5_impro_6_F[train_impro_6: train_impro_6+val_impro_6],
                                df_impro_session_5_impro_6_M[train_impro_6: train_impro_6+val_impro_6],
                                df_impro_session_1_impro_7_F[train_impro_7: train_impro_7+val_impro_7],
                                df_impro_session_1_impro_7_M[train_impro_7: train_impro_7+val_impro_7],
                                df_impro_session_2_impro_7_F[train_impro_7: train_impro_7+val_impro_7], 
                                df_impro_session_2_impro_7_M[train_impro_7: train_impro_7+val_impro_7], 
                                df_impro_session_3_impro_7_F[train_impro_7: train_impro_7+val_impro_7],
                                df_impro_session_3_impro_7_M[train_impro_7: train_impro_7+val_impro_7],
                                df_impro_session_4_impro_7_F[train_impro_7: train_impro_7+val_impro_7],
                                df_impro_session_4_impro_7_M[train_impro_7: train_impro_7+val_impro_7],
                                df_impro_session_5_impro_7_F[train_impro_7: train_impro_7+val_impro_7],
                                df_impro_session_5_impro_7_M[train_impro_7: train_impro_7+val_impro_7],
                                df_impro_session_1_impro_8_F[train_impro_8: train_impro_8+val_impro_8],
                                df_impro_session_1_impro_8_M[train_impro_8: train_impro_8+val_impro_8],
                                df_impro_session_2_impro_8_F[train_impro_8: train_impro_8+val_impro_8], 
                                df_impro_session_2_impro_8_M[train_impro_8: train_impro_8+val_impro_8], 
                                df_impro_session_3_impro_8_F[train_impro_8: train_impro_8+val_impro_8],
                                df_impro_session_3_impro_8_M[train_impro_8: train_impro_8+val_impro_8],
                                df_impro_session_4_impro_8_F[train_impro_8: train_impro_8+val_impro_8],
                                df_impro_session_4_impro_8_M[train_impro_8: train_impro_8+val_impro_8],
                                df_impro_session_5_impro_8_F[train_impro_8: train_impro_8+val_impro_8],
                                df_impro_session_5_impro_8_M[train_impro_8: train_impro_8+val_impro_8]]) 
    df_impro_test = pd.concat([df_impro_session_1_impro_1_F[train_impro_1+val_impro_1:],
                                df_impro_session_1_impro_1_M[train_impro_1+val_impro_1:],
                                df_impro_session_2_impro_1_F[train_impro_1+val_impro_1:], 
                                df_impro_session_2_impro_1_M[train_impro_1+val_impro_1:], 
                                df_impro_session_3_impro_1_F[train_impro_1+val_impro_1:],
                                df_impro_session_3_impro_1_M[train_impro_1+val_impro_1:],
                                df_impro_session_4_impro_1_F[train_impro_1+val_impro_1:],
                                df_impro_session_5_impro_1_F[train_impro_1+val_impro_1:],
                                df_impro_session_5_impro_1_M[train_impro_1+val_impro_1:],
                                df_impro_session_1_impro_2_F[train_impro_2+val_impro_2:],
                                df_impro_session_1_impro_2_M[train_impro_2+val_impro_2:],
                                df_impro_session_2_impro_2_F[train_impro_2+val_impro_2:], 
                                df_impro_session_2_impro_2_M[train_impro_2+val_impro_2:], 
                                df_impro_session_3_impro_2_F[train_impro_2+val_impro_2:],
                                df_impro_session_3_impro_2_M[train_impro_2+val_impro_2:],
                                df_impro_session_4_impro_2_F[train_impro_2+val_impro_2:],
                                df_impro_session_4_impro_2_M[train_impro_2+val_impro_2:],
                                df_impro_session_5_impro_2_F[train_impro_2+val_impro_2:],
                                df_impro_session_5_impro_2_M[train_impro_2+val_impro_2:],
                                df_impro_session_1_impro_3_F[train_impro_3+val_impro_3:],
                                df_impro_session_1_impro_3_M[train_impro_3+val_impro_3:],
                                df_impro_session_2_impro_3_F[train_impro_3+val_impro_3:], 
                                df_impro_session_2_impro_3_M[train_impro_3+val_impro_3:], 
                                df_impro_session_3_impro_3_F[train_impro_3+val_impro_3:],
                                df_impro_session_3_impro_3_M[train_impro_3+val_impro_3:],
                                df_impro_session_4_impro_3_F[train_impro_3+val_impro_3:],
                                df_impro_session_4_impro_3_M[train_impro_3+val_impro_3:],
                                df_impro_session_5_impro_3_F[train_impro_3+val_impro_3:],
                                df_impro_session_5_impro_3_M[train_impro_3+val_impro_3:],
                                df_impro_session_1_impro_4_F[train_impro_4+val_impro_4:],
                                df_impro_session_1_impro_4_M[train_impro_4+val_impro_4:],
                                df_impro_session_2_impro_4_F[train_impro_4+val_impro_4:], 
                                df_impro_session_2_impro_4_M[train_impro_4+val_impro_4:], 
                                df_impro_session_3_impro_4_F[train_impro_4+val_impro_4:],
                                df_impro_session_3_impro_4_M[train_impro_4+val_impro_4:],
                                df_impro_session_4_impro_4_F[train_impro_4+val_impro_4:],
                                df_impro_session_4_impro_4_M[train_impro_4+val_impro_4:],
                                df_impro_session_5_impro_4_F[train_impro_4+val_impro_4:],
                                df_impro_session_5_impro_4_M[train_impro_4+val_impro_4:],
                                df_impro_session_1_impro_5_F[train_impro_5+val_impro_5:],
                                df_impro_session_1_impro_5_M[train_impro_5+val_impro_5:],
                                df_impro_session_2_impro_5_F[train_impro_5+val_impro_5:], 
                                df_impro_session_2_impro_5_M[train_impro_5+val_impro_5:], 
                                df_impro_session_3_impro_5_F[train_impro_5+val_impro_5:],
                                df_impro_session_3_impro_5_M[train_impro_5+val_impro_5:],
                                df_impro_session_4_impro_5_F[train_impro_5+val_impro_5:],
                                df_impro_session_4_impro_5_M[train_impro_5+val_impro_5:],
                                df_impro_session_5_impro_5_F[train_impro_5+val_impro_5:],
                                df_impro_session_5_impro_5_M[train_impro_5+val_impro_5:],
                                df_impro_session_1_impro_6_F[train_impro_6+val_impro_6:],
                                df_impro_session_1_impro_6_M[train_impro_6+val_impro_6:],
                                df_impro_session_2_impro_6_F[train_impro_6+val_impro_6:], 
                                df_impro_session_2_impro_6_M[train_impro_6+val_impro_6:], 
                                df_impro_session_3_impro_6_F[train_impro_6+val_impro_6:],
                                df_impro_session_3_impro_6_M[train_impro_6+val_impro_6:],
                                df_impro_session_4_impro_6_F[train_impro_6+val_impro_6:],
                                df_impro_session_4_impro_6_M[train_impro_6+val_impro_6:],
                                df_impro_session_5_impro_6_F[train_impro_6+val_impro_6:],
                                df_impro_session_5_impro_6_M[train_impro_6+val_impro_6:],
                                df_impro_session_1_impro_7_F[train_impro_7+val_impro_7:],
                                df_impro_session_1_impro_7_M[train_impro_7+val_impro_7:],
                                df_impro_session_2_impro_7_F[train_impro_7+val_impro_7:], 
                                df_impro_session_2_impro_7_M[train_impro_7+val_impro_7:], 
                                df_impro_session_3_impro_7_F[train_impro_7+val_impro_7:],
                                df_impro_session_3_impro_7_M[train_impro_7+val_impro_7:],
                                df_impro_session_4_impro_7_F[train_impro_7+val_impro_7:],
                                df_impro_session_4_impro_7_M[train_impro_7+val_impro_7:],
                                df_impro_session_5_impro_7_F[train_impro_7+val_impro_7:],
                                df_impro_session_5_impro_7_M[train_impro_7+val_impro_7:],
                                df_impro_session_1_impro_8_F[train_impro_8+val_impro_8:],
                                df_impro_session_1_impro_8_M[train_impro_8+val_impro_8:],
                                df_impro_session_2_impro_8_F[train_impro_8+val_impro_8:], 
                                df_impro_session_2_impro_8_M[train_impro_8+val_impro_8:], 
                                df_impro_session_3_impro_8_F[train_impro_8+val_impro_8:],
                                df_impro_session_3_impro_8_M[train_impro_8+val_impro_8:],
                                df_impro_session_4_impro_8_F[train_impro_8+val_impro_8:],
                                df_impro_session_4_impro_8_M[train_impro_8+val_impro_8:],
                                df_impro_session_5_impro_8_F[train_impro_8+val_impro_8:],
                                df_impro_session_5_impro_8_M[train_impro_8+val_impro_8:]]) 
    return df_impro_train, df_impro_val, df_impro_test

df_impro_train, df_impro_val, df_impro_test = impro_splits() 
len(df_impro_val)                                    

Unnamed: 0,utt_id,EDA,filename,session_number,speaker,improv_script_id,start,end,utterance,original_order
88,9,qy,Ses01F_impro01,1,M,impro01,65.5100,73.0000,"A birth certificate, a passport...a student ID...",19
89,10,qw,Ses01F_impro01,1,F,impro01,66.4200,69.3400,Who the hell has a birth certificate?,20
905,9,qy,Ses01M_impro01,1,F,impro01,52.3400,55.1200,"Well, do you have your passport?",19
906,10,sd,Ses01M_impro01,1,M,impro01,54.4900,56.7100,"No, I don't have a passport.",20
2322,9,sv,Ses02F_impro01,2,M,impro01,72.2875,79.2332,I think you're just gonna--you're gonna have t...,19
...,...,...,...,...,...,...,...,...,...,...
8066,15,sv,Ses05F_impro07,5,F,impro07,75.2700,76.6600,awesome.,32
8941,14,sd,Ses05M_impro07,5,F,impro07,57.6600,63.5900,"Not on campus, but um- I probably won't live t...",29
8942,15,sd,Ses05M_impro07,5,M,impro07,62.7600,64.3900,We are gonna party all the time.,30
8943,15,qw,Ses05M_impro07,5,F,impro07,63.9800,66.8800,Yeah. Totally- Well where are you going to live?,31


In [116]:
final_df[(final_df["filename"].str.contains("impro"))]


Unnamed: 0,utt_id,EDA,filename,session_number,speaker,improv_script_id,start,end,utterance,original_order
0,0,sd,Ses01M_impro07,1,M,impro07,2.6812,7.98,Check this out. You know how I've told you I'...,0
1,0,b,Ses01M_impro07,1,F,impro07,7.6300,8.57,Yeah.,1
2,1,sd,Ses01M_impro07,1,M,impro07,8.2200,14.75,"Well, this is totally random, I got this full ...",2
3,1,qy,Ses01M_impro07,1,F,impro07,13.9500,21.12,[LAUGHTER]. For softball? That's unbelievable....,3
4,2,qy,Ses01M_impro07,1,M,impro07,15.5400,20.67,For softball. They're going to pay me to go t...,4
...,...,...,...,...,...,...,...,...,...,...
9966,24,aa,Ses05F_impro06,5,M,impro06,207.0900,209.07,Absolutely.,51
9967,27,sd,Ses05F_impro06,5,F,impro06,208.7300,210.52,[BREATHING],52
9968,25,qy,Ses05F_impro06,5,M,impro06,210.4800,221.85,"Do you know what, if it was me that died, I'd ...",53
9969,28,xx,Ses05F_impro06,5,F,impro06,215.3500,217.15,[LAUGHTER],54


In [None]:
def improv_splits():    
    # session 1
    df_scripted_session_1_script_1_M = final_df[(final_df["filename"].str.contains("M_script01")) & (final_df["session_number"] == 1)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_1_script_1_F = final_df[(final_df["filename"].str.contains("F_script01")) & (final_df["session_number"] == 1)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_1_script_2_M = final_df[(final_df["filename"].str.contains("M_script02")) & (final_df["session_number"] == 1)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_1_script_2_F = final_df[(final_df["filename"].str.contains("F_script02")) & (final_df["session_number"] == 1)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_1_script_3_M = final_df[(final_df["filename"].str.contains("M_script03")) & (final_df["session_number"] == 1)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_1_script_3_F = final_df[(final_df["filename"].str.contains("F_script03")) & (final_df["session_number"] == 1)].sort_values(by=['filename', 'original_order'])

    # session 2
    df_scripted_session_2_script_1_M = final_df[(final_df["filename"].str.contains("M_script01")) & (final_df["session_number"] == 2)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_2_script_1_F = final_df[(final_df["filename"].str.contains("F_script01")) & (final_df["session_number"] == 2)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_2_script_2_M = final_df[(final_df["filename"].str.contains("M_script02")) & (final_df["session_number"] == 2)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_2_script_2_F = final_df[(final_df["filename"].str.contains("F_script02")) & (final_df["session_number"] == 2)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_2_script_3_M = final_df[(final_df["filename"].str.contains("M_script03")) & (final_df["session_number"] == 2)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_2_script_3_F = final_df[(final_df["filename"].str.contains("F_script03")) & (final_df["session_number"] == 2)].sort_values(by=['filename', 'original_order'])

    # session 3
    df_scripted_session_3_script_1_M = final_df[(final_df["filename"].str.contains("M_script01")) & (final_df["session_number"] == 3)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_3_script_1_F = final_df[(final_df["filename"].str.contains("F_script01")) & (final_df["session_number"] == 3)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_3_script_2_M = final_df[(final_df["filename"].str.contains("M_script02")) & (final_df["session_number"] == 3)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_3_script_2_F = final_df[(final_df["filename"].str.contains("F_script02")) & (final_df["session_number"] == 3)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_3_script_3_M = final_df[(final_df["filename"].str.contains("M_script03")) & (final_df["session_number"] == 3)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_3_script_3_F = final_df[(final_df["filename"].str.contains("F_script03")) & (final_df["session_number"] == 3)].sort_values(by=['filename', 'original_order'])

    # session 4
    df_scripted_session_4_script_1_M = final_df[(final_df["filename"].str.contains("M_script01")) & (final_df["session_number"] == 4)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_4_script_1_F = final_df[(final_df["filename"].str.contains("F_script01")) & (final_df["session_number"] == 4)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_4_script_2_M = final_df[(final_df["filename"].str.contains("M_script02")) & (final_df["session_number"] == 4)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_4_script_2_F = final_df[(final_df["filename"].str.contains("F_script02")) & (final_df["session_number"] == 4)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_4_script_3_M = final_df[(final_df["filename"].str.contains("M_script03")) & (final_df["session_number"] == 4)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_4_script_3_F = final_df[(final_df["filename"].str.contains("F_script03")) & (final_df["session_number"] == 4)].sort_values(by=['filename', 'original_order'])

    # session 5
    df_scripted_session_5_script_1_M = final_df[(final_df["filename"].str.contains("M_script01")) & (final_df["session_number"] == 5)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_5_script_1_F = final_df[(final_df["filename"].str.contains("F_script01")) & (final_df["session_number"] == 5)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_5_script_2_M = final_df[(final_df["filename"].str.contains("M_script02")) & (final_df["session_number"] == 5)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_5_script_2_F = final_df[(final_df["filename"].str.contains("F_script02")) & (final_df["session_number"] == 5)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_5_script_3_M = final_df[(final_df["filename"].str.contains("M_script03")) & (final_df["session_number"] == 5)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_5_script_3_F = final_df[(final_df["filename"].str.contains("F_script03")) & (final_df["session_number"] == 5)].sort_values(by=['filename', 'original_order'])

    # need to this for each script across sections because although they are the same script, the lines are not memorized perfectly and so there are some length differences
    min_script_1 = min([len(df_scripted_session_1_script_1_F), len(df_scripted_session_1_script_1_M),\
                        len(df_scripted_session_2_script_1_F), len(df_scripted_session_2_script_1_M),\
                        len(df_scripted_session_3_script_1_F), len(df_scripted_session_3_script_1_M),\
                        len(df_scripted_session_4_script_1_F), len(df_scripted_session_4_script_1_M),\
                        len(df_scripted_session_5_script_1_F), len(df_scripted_session_5_script_1_M)])

    min_script_2 = min([len(df_scripted_session_1_script_2_F), len(df_scripted_session_1_script_2_M),\
                        len(df_scripted_session_2_script_2_F), len(df_scripted_session_2_script_2_M),\
                        len(df_scripted_session_3_script_2_F), len(df_scripted_session_3_script_2_M),\
                        len(df_scripted_session_4_script_2_F), len(df_scripted_session_4_script_2_M),\
                        len(df_scripted_session_5_script_2_F), len(df_scripted_session_5_script_2_M)])

    min_script_3 = min([len(df_scripted_session_1_script_3_F), len(df_scripted_session_1_script_3_M),\
                        len(df_scripted_session_2_script_3_F), len(df_scripted_session_2_script_3_M),\
                        len(df_scripted_session_3_script_3_F), len(df_scripted_session_3_script_3_M),\
                        len(df_scripted_session_4_script_3_F), len(df_scripted_session_4_script_3_M),\
                        len(df_scripted_session_5_script_3_F), len(df_scripted_session_5_script_3_M)])

    train_script_1 = int(min_script_1*0.8)
    val_script_1 = (min_script_1-train_script_1)//2

    train_script_2 = int(min_script_2*0.8)
    val_script_2 = (min_script_2-train_script_2)//2

    train_script_3 = int(min_script_3*0.8)
    val_script_3 = (min_script_3-train_script_3)//2

    #df_scripted_session_1_script_1_F.sample(frac=1) # going to ignore this for now and not shuffle within each script because that means merging sessions... to confusing and not perfect matching
    df_scripted_train = pd.concat([df_scripted_session_1_script_1_F[:train_script_1],
                                df_scripted_session_1_script_1_M[:train_script_1],
                                df_scripted_session_2_script_1_F[:train_script_1], 
                                df_scripted_session_2_script_1_M[:train_script_1], 
                                df_scripted_session_3_script_1_F[:train_script_1],
                                df_scripted_session_3_script_1_M[:train_script_1],
                                df_scripted_session_4_script_1_F[:train_script_1],
                                df_scripted_session_4_script_1_M[:train_script_1],
                                df_scripted_session_5_script_1_F[:train_script_1],
                                df_scripted_session_5_script_1_M[:train_script_1],
                                df_scripted_session_1_script_2_F[:train_script_2],
                                df_scripted_session_1_script_2_M[:train_script_2],
                                df_scripted_session_2_script_2_F[:train_script_2], 
                                df_scripted_session_2_script_2_M[:train_script_2], 
                                df_scripted_session_3_script_2_F[:train_script_2],
                                df_scripted_session_3_script_2_M[:train_script_2],
                                df_scripted_session_4_script_2_F[:train_script_2],
                                df_scripted_session_4_script_2_M[:train_script_2],
                                df_scripted_session_5_script_2_F[:train_script_2],
                                df_scripted_session_5_script_2_M[:train_script_2],
                                df_scripted_session_1_script_3_F[:train_script_3],
                                df_scripted_session_1_script_3_M[:train_script_3],
                                df_scripted_session_2_script_3_F[:train_script_3], 
                                df_scripted_session_2_script_3_M[:train_script_3], 
                                df_scripted_session_3_script_3_F[:train_script_3],
                                df_scripted_session_3_script_3_M[:train_script_3],
                                df_scripted_session_4_script_3_F[:train_script_3],
                                df_scripted_session_4_script_3_M[:train_script_3],
                                df_scripted_session_5_script_3_F[:train_script_3],
                                df_scripted_session_5_script_3_M[:train_script_3]])

    df_scripted_val = pd.concat([df_scripted_session_1_script_1_F[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_1_script_1_M[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_2_script_1_F[train_script_1: train_script_1+val_script_1], 
                                df_scripted_session_2_script_1_M[train_script_1: train_script_1+val_script_1], 
                                df_scripted_session_3_script_1_F[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_3_script_1_M[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_4_script_1_F[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_4_script_1_M[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_5_script_1_F[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_5_script_1_M[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_1_script_2_F[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_1_script_2_M[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_2_script_2_F[train_script_2: train_script_2+val_script_2], 
                                df_scripted_session_2_script_2_M[train_script_2: train_script_2+val_script_2], 
                                df_scripted_session_3_script_2_F[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_3_script_2_M[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_4_script_2_F[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_4_script_2_M[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_5_script_2_F[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_5_script_2_M[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_1_script_3_F[train_script_3: train_script_3+val_script_3],
                                df_scripted_session_1_script_3_M[train_script_3: train_script_3+val_script_3],
                                df_scripted_session_2_script_3_F[train_script_3: train_script_3+val_script_3], 
                                df_scripted_session_2_script_3_M[train_script_3: train_script_3+val_script_3], 
                                df_scripted_session_3_script_3_F[train_script_3: train_script_3+val_script_3],
                                df_scripted_session_3_script_3_M[train_script_3: train_script_3+val_script_3],
                                df_scripted_session_4_script_3_F[train_script_3: train_script_3+val_script_3],
                                df_scripted_session_4_script_3_M[train_script_3: train_script_3+val_script_3],
                                df_scripted_session_5_script_3_F[train_script_3: train_script_3+val_script_3],
                                df_scripted_session_5_script_3_M[train_script_3: train_script_3+val_script_3]])

    df_scripted_test = pd.concat([df_scripted_session_1_script_1_F[train_script_1+val_script_1:],
                                df_scripted_session_1_script_1_M[train_script_1+val_script_1:],
                                df_scripted_session_2_script_1_F[train_script_1+val_script_1:], 
                                df_scripted_session_2_script_1_M[train_script_1+val_script_1:], 
                                df_scripted_session_3_script_1_F[train_script_1+val_script_1:],
                                df_scripted_session_3_script_1_M[train_script_1+val_script_1:],
                                df_scripted_session_4_script_1_F[train_script_1+val_script_1:],
                                df_scripted_session_5_script_1_F[train_script_1+val_script_1:],
                                df_scripted_session_5_script_1_M[train_script_1+val_script_1:],
                                df_scripted_session_1_script_2_F[train_script_2+val_script_2:],
                                df_scripted_session_1_script_2_M[train_script_2+val_script_2:],
                                df_scripted_session_2_script_2_F[train_script_2+val_script_2:], 
                                df_scripted_session_2_script_2_M[train_script_2+val_script_2:], 
                                df_scripted_session_3_script_2_F[train_script_2+val_script_2:],
                                df_scripted_session_3_script_2_M[train_script_2+val_script_2:],
                                df_scripted_session_4_script_2_F[train_script_2+val_script_2:],
                                df_scripted_session_4_script_2_M[train_script_2+val_script_2:],
                                df_scripted_session_5_script_2_F[train_script_2+val_script_2:],
                                df_scripted_session_5_script_2_M[train_script_2+val_script_2:],
                                df_scripted_session_1_script_3_F[train_script_3+val_script_3:],
                                df_scripted_session_1_script_3_M[train_script_3+val_script_3:],
                                df_scripted_session_2_script_3_F[train_script_3+val_script_3:], 
                                df_scripted_session_2_script_3_M[train_script_3+val_script_3:], 
                                df_scripted_session_3_script_3_F[train_script_3+val_script_3:],
                                df_scripted_session_3_script_3_M[train_script_3+val_script_3:],
                                df_scripted_session_4_script_3_F[train_script_3+val_script_3:],
                                df_scripted_session_4_script_3_M[train_script_3+val_script_3:],
                                df_scripted_session_5_script_3_F[train_script_3+val_script_3:],
                                df_scripted_session_5_script_3_M[train_script_3+val_script_3:]])

    # need to split separately across the different sessions since they all have the same scripts
    # df_session_1_script_1 = pd.merge(df_scripted_session_1_script_1_F, df_scripted_session_1_script_1_M, on=["utt_id", "session_number", "improv_script_id", "speaker"])
    # df_session_2_script_1 = pd.merge(df_scripted_session_2_script_1_F, df_scripted_session_2_script_1_M, on=["utt_id", "session_number", "improv_script_id", "speaker"])
    # df_session_3_script_1 = pd.merge(df_scripted_session_3_script_1_F, df_scripted_session_3_script_1_M, on=["utt_id", "session_number", "improv_script_id", "speaker"])
    # df_session_4_script_1 = pd.merge(df_scripted_session_4_script_1_F, df_scripted_session_4_script_1_M, on=["utt_id", "session_number", "improv_script_id", "speaker"])
    # df_session_5_script_1 = pd.merge(df_scripted_session_5_script_1_F, df_scripted_session_5_script_1_M, on=["utt_id", "session_number", "improv_script_id", "speaker"])

    return df_scripted_train, df_scripted_val, df_scripted_test
    


In [117]:
def scripted_splits():    
    # session 1
    df_scripted_session_1_script_1_M = final_df[(final_df["filename"].str.contains("M_script01")) & (final_df["session_number"] == 1)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_1_script_1_F = final_df[(final_df["filename"].str.contains("F_script01")) & (final_df["session_number"] == 1)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_1_script_2_M = final_df[(final_df["filename"].str.contains("M_script02")) & (final_df["session_number"] == 1)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_1_script_2_F = final_df[(final_df["filename"].str.contains("F_script02")) & (final_df["session_number"] == 1)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_1_script_3_M = final_df[(final_df["filename"].str.contains("M_script03")) & (final_df["session_number"] == 1)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_1_script_3_F = final_df[(final_df["filename"].str.contains("F_script03")) & (final_df["session_number"] == 1)].sort_values(by=['filename', 'original_order'])

    # session 2
    df_scripted_session_2_script_1_M = final_df[(final_df["filename"].str.contains("M_script01")) & (final_df["session_number"] == 2)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_2_script_1_F = final_df[(final_df["filename"].str.contains("F_script01")) & (final_df["session_number"] == 2)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_2_script_2_M = final_df[(final_df["filename"].str.contains("M_script02")) & (final_df["session_number"] == 2)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_2_script_2_F = final_df[(final_df["filename"].str.contains("F_script02")) & (final_df["session_number"] == 2)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_2_script_3_M = final_df[(final_df["filename"].str.contains("M_script03")) & (final_df["session_number"] == 2)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_2_script_3_F = final_df[(final_df["filename"].str.contains("F_script03")) & (final_df["session_number"] == 2)].sort_values(by=['filename', 'original_order'])

    # session 3
    df_scripted_session_3_script_1_M = final_df[(final_df["filename"].str.contains("M_script01")) & (final_df["session_number"] == 3)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_3_script_1_F = final_df[(final_df["filename"].str.contains("F_script01")) & (final_df["session_number"] == 3)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_3_script_2_M = final_df[(final_df["filename"].str.contains("M_script02")) & (final_df["session_number"] == 3)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_3_script_2_F = final_df[(final_df["filename"].str.contains("F_script02")) & (final_df["session_number"] == 3)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_3_script_3_M = final_df[(final_df["filename"].str.contains("M_script03")) & (final_df["session_number"] == 3)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_3_script_3_F = final_df[(final_df["filename"].str.contains("F_script03")) & (final_df["session_number"] == 3)].sort_values(by=['filename', 'original_order'])

    # session 4
    df_scripted_session_4_script_1_M = final_df[(final_df["filename"].str.contains("M_script01")) & (final_df["session_number"] == 4)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_4_script_1_F = final_df[(final_df["filename"].str.contains("F_script01")) & (final_df["session_number"] == 4)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_4_script_2_M = final_df[(final_df["filename"].str.contains("M_script02")) & (final_df["session_number"] == 4)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_4_script_2_F = final_df[(final_df["filename"].str.contains("F_script02")) & (final_df["session_number"] == 4)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_4_script_3_M = final_df[(final_df["filename"].str.contains("M_script03")) & (final_df["session_number"] == 4)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_4_script_3_F = final_df[(final_df["filename"].str.contains("F_script03")) & (final_df["session_number"] == 4)].sort_values(by=['filename', 'original_order'])

    # session 5
    df_scripted_session_5_script_1_M = final_df[(final_df["filename"].str.contains("M_script01")) & (final_df["session_number"] == 5)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_5_script_1_F = final_df[(final_df["filename"].str.contains("F_script01")) & (final_df["session_number"] == 5)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_5_script_2_M = final_df[(final_df["filename"].str.contains("M_script02")) & (final_df["session_number"] == 5)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_5_script_2_F = final_df[(final_df["filename"].str.contains("F_script02")) & (final_df["session_number"] == 5)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_5_script_3_M = final_df[(final_df["filename"].str.contains("M_script03")) & (final_df["session_number"] == 5)].sort_values(by=['filename', 'original_order'])
    df_scripted_session_5_script_3_F = final_df[(final_df["filename"].str.contains("F_script03")) & (final_df["session_number"] == 5)].sort_values(by=['filename', 'original_order'])

    # need to this for each script across sections because although they are the same script, the lines are not memorized perfectly and so there are some length differences
    min_script_1 = min([len(df_scripted_session_1_script_1_F), len(df_scripted_session_1_script_1_M),\
                        len(df_scripted_session_2_script_1_F), len(df_scripted_session_2_script_1_M),\
                        len(df_scripted_session_3_script_1_F), len(df_scripted_session_3_script_1_M),\
                        len(df_scripted_session_4_script_1_F), len(df_scripted_session_4_script_1_M),\
                        len(df_scripted_session_5_script_1_F), len(df_scripted_session_5_script_1_M)])

    min_script_2 = min([len(df_scripted_session_1_script_2_F), len(df_scripted_session_1_script_2_M),\
                        len(df_scripted_session_2_script_2_F), len(df_scripted_session_2_script_2_M),\
                        len(df_scripted_session_3_script_2_F), len(df_scripted_session_3_script_2_M),\
                        len(df_scripted_session_4_script_2_F), len(df_scripted_session_4_script_2_M),\
                        len(df_scripted_session_5_script_2_F), len(df_scripted_session_5_script_2_M)])

    min_script_3 = min([len(df_scripted_session_1_script_3_F), len(df_scripted_session_1_script_3_M),\
                        len(df_scripted_session_2_script_3_F), len(df_scripted_session_2_script_3_M),\
                        len(df_scripted_session_3_script_3_F), len(df_scripted_session_3_script_3_M),\
                        len(df_scripted_session_4_script_3_F), len(df_scripted_session_4_script_3_M),\
                        len(df_scripted_session_5_script_3_F), len(df_scripted_session_5_script_3_M)])

    train_script_1 = int(min_script_1*0.8)
    val_script_1 = (min_script_1-train_script_1)//2

    train_script_2 = int(min_script_2*0.8)
    val_script_2 = (min_script_2-train_script_2)//2

    train_script_3 = int(min_script_3*0.8)
    val_script_3 = (min_script_3-train_script_3)//2

    #df_scripted_session_1_script_1_F.sample(frac=1) # going to ignore this for now and not shuffle within each script because that means merging sessions... to confusing and not perfect matching
    df_scripted_train = pd.concat([df_scripted_session_1_script_1_F[:train_script_1],
                                df_scripted_session_1_script_1_M[:train_script_1],
                                df_scripted_session_2_script_1_F[:train_script_1], 
                                df_scripted_session_2_script_1_M[:train_script_1], 
                                df_scripted_session_3_script_1_F[:train_script_1],
                                df_scripted_session_3_script_1_M[:train_script_1],
                                df_scripted_session_4_script_1_F[:train_script_1],
                                df_scripted_session_4_script_1_M[:train_script_1],
                                df_scripted_session_5_script_1_F[:train_script_1],
                                df_scripted_session_5_script_1_M[:train_script_1],
                                df_scripted_session_1_script_2_F[:train_script_2],
                                df_scripted_session_1_script_2_M[:train_script_2],
                                df_scripted_session_2_script_2_F[:train_script_2], 
                                df_scripted_session_2_script_2_M[:train_script_2], 
                                df_scripted_session_3_script_2_F[:train_script_2],
                                df_scripted_session_3_script_2_M[:train_script_2],
                                df_scripted_session_4_script_2_F[:train_script_2],
                                df_scripted_session_4_script_2_M[:train_script_2],
                                df_scripted_session_5_script_2_F[:train_script_2],
                                df_scripted_session_5_script_2_M[:train_script_2],
                                df_scripted_session_1_script_3_F[:train_script_3],
                                df_scripted_session_1_script_3_M[:train_script_3],
                                df_scripted_session_2_script_3_F[:train_script_3], 
                                df_scripted_session_2_script_3_M[:train_script_3], 
                                df_scripted_session_3_script_3_F[:train_script_3],
                                df_scripted_session_3_script_3_M[:train_script_3],
                                df_scripted_session_4_script_3_F[:train_script_3],
                                df_scripted_session_4_script_3_M[:train_script_3],
                                df_scripted_session_5_script_3_F[:train_script_3],
                                df_scripted_session_5_script_3_M[:train_script_3]])

    df_scripted_val = pd.concat([df_scripted_session_1_script_1_F[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_1_script_1_M[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_2_script_1_F[train_script_1: train_script_1+val_script_1], 
                                df_scripted_session_2_script_1_M[train_script_1: train_script_1+val_script_1], 
                                df_scripted_session_3_script_1_F[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_3_script_1_M[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_4_script_1_F[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_4_script_1_M[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_5_script_1_F[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_5_script_1_M[train_script_1: train_script_1+val_script_1],
                                df_scripted_session_1_script_2_F[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_1_script_2_M[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_2_script_2_F[train_script_2: train_script_2+val_script_2], 
                                df_scripted_session_2_script_2_M[train_script_2: train_script_2+val_script_2], 
                                df_scripted_session_3_script_2_F[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_3_script_2_M[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_4_script_2_F[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_4_script_2_M[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_5_script_2_F[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_5_script_2_M[train_script_2: train_script_2+val_script_2],
                                df_scripted_session_1_script_3_F[train_script_3: train_script_3+val_script_3],
                                df_scripted_session_1_script_3_M[train_script_3: train_script_3+val_script_3],
                                df_scripted_session_2_script_3_F[train_script_3: train_script_3+val_script_3], 
                                df_scripted_session_2_script_3_M[train_script_3: train_script_3+val_script_3], 
                                df_scripted_session_3_script_3_F[train_script_3: train_script_3+val_script_3],
                                df_scripted_session_3_script_3_M[train_script_3: train_script_3+val_script_3],
                                df_scripted_session_4_script_3_F[train_script_3: train_script_3+val_script_3],
                                df_scripted_session_4_script_3_M[train_script_3: train_script_3+val_script_3],
                                df_scripted_session_5_script_3_F[train_script_3: train_script_3+val_script_3],
                                df_scripted_session_5_script_3_M[train_script_3: train_script_3+val_script_3]])

    df_scripted_test = pd.concat([df_scripted_session_1_script_1_F[train_script_1+val_script_1:],
                                df_scripted_session_1_script_1_M[train_script_1+val_script_1:],
                                df_scripted_session_2_script_1_F[train_script_1+val_script_1:], 
                                df_scripted_session_2_script_1_M[train_script_1+val_script_1:], 
                                df_scripted_session_3_script_1_F[train_script_1+val_script_1:],
                                df_scripted_session_3_script_1_M[train_script_1+val_script_1:],
                                df_scripted_session_4_script_1_F[train_script_1+val_script_1:],
                                df_scripted_session_5_script_1_F[train_script_1+val_script_1:],
                                df_scripted_session_5_script_1_M[train_script_1+val_script_1:],
                                df_scripted_session_1_script_2_F[train_script_2+val_script_2:],
                                df_scripted_session_1_script_2_M[train_script_2+val_script_2:],
                                df_scripted_session_2_script_2_F[train_script_2+val_script_2:], 
                                df_scripted_session_2_script_2_M[train_script_2+val_script_2:], 
                                df_scripted_session_3_script_2_F[train_script_2+val_script_2:],
                                df_scripted_session_3_script_2_M[train_script_2+val_script_2:],
                                df_scripted_session_4_script_2_F[train_script_2+val_script_2:],
                                df_scripted_session_4_script_2_M[train_script_2+val_script_2:],
                                df_scripted_session_5_script_2_F[train_script_2+val_script_2:],
                                df_scripted_session_5_script_2_M[train_script_2+val_script_2:],
                                df_scripted_session_1_script_3_F[train_script_3+val_script_3:],
                                df_scripted_session_1_script_3_M[train_script_3+val_script_3:],
                                df_scripted_session_2_script_3_F[train_script_3+val_script_3:], 
                                df_scripted_session_2_script_3_M[train_script_3+val_script_3:], 
                                df_scripted_session_3_script_3_F[train_script_3+val_script_3:],
                                df_scripted_session_3_script_3_M[train_script_3+val_script_3:],
                                df_scripted_session_4_script_3_F[train_script_3+val_script_3:],
                                df_scripted_session_4_script_3_M[train_script_3+val_script_3:],
                                df_scripted_session_5_script_3_F[train_script_3+val_script_3:],
                                df_scripted_session_5_script_3_M[train_script_3+val_script_3:]])

    # need to split separately across the different sessions since they all have the same scripts
    # df_session_1_script_1 = pd.merge(df_scripted_session_1_script_1_F, df_scripted_session_1_script_1_M, on=["utt_id", "session_number", "improv_script_id", "speaker"])
    # df_session_2_script_1 = pd.merge(df_scripted_session_2_script_1_F, df_scripted_session_2_script_1_M, on=["utt_id", "session_number", "improv_script_id", "speaker"])
    # df_session_3_script_1 = pd.merge(df_scripted_session_3_script_1_F, df_scripted_session_3_script_1_M, on=["utt_id", "session_number", "improv_script_id", "speaker"])
    # df_session_4_script_1 = pd.merge(df_scripted_session_4_script_1_F, df_scripted_session_4_script_1_M, on=["utt_id", "session_number", "improv_script_id", "speaker"])
    # df_session_5_script_1 = pd.merge(df_scripted_session_5_script_1_F, df_scripted_session_5_script_1_M, on=["utt_id", "session_number", "improv_script_id", "speaker"])

    return df_scripted_train, df_scripted_val, df_scripted_test
    

df_scripted_train, df_scripted_val, df_scripted_test = scripted_splits()


In [112]:
3770/5227, 470/5227, 987/5227, 

(0.7212550220011479, 0.0899177348383394, 0.18882724316051272)

In [96]:
# scripted = 5,255

(149, 19, 108, 13, 120, 15)

In [75]:
187-(149+19)

19

In [39]:
df_scripted[df_scripted["filename"].str.contains("M_script01")]

Unnamed: 0,utt_id,EDA,filename,session_number,speaker,start,end,utterance,original_order
99,0,sd,Ses01M_script01_3,1,F,8.2904,11.9425,The only one I know still love his parents.,0
100,0,qy,Ses01M_script01_3,1,M,11.8600,15.1286,"Yeah, that kind of went out of style didn't it?",1
101,1,sv,Ses01M_script01_3,1,F,14.5812,20.0400,Oh it's not bad thing it's good thing.,2
102,2,sv,Ses01M_script01_3,1,F,20.7769,27.6000,"You know it's nice here, the air is sweet.",3
103,1,qy,Ses01M_script01_3,1,M,28.1000,30.5850,You're not sorry you came?,4
...,...,...,...,...,...,...,...,...,...
8907,36,qy,Ses05M_script01_1b,5,F,365.5200,367.8500,"Don't think like that, do you hear me?",76
8908,40,sd,Ses05M_script01_1b,5,M,367.0300,369.6377,"Well, I am thinking like that.",77
8909,37,qy,Ses05M_script01_1b,5,F,370.8700,373.0100,"I don't understand you, do I?",78
8910,41,sv,Ses05M_script01_1b,5,M,372.5900,377.3700,"No, you don't. I'm a pretty tough guy.",79


In [None]:
df_train_scripted = final_df[final_df["filename"].str.contains("script01")]
script_data_script_1_F

In [34]:
script_data_script_1_F = final_df[(final_df["filename"].str.contains("F_script01")) & (final_df["session_number"] == 1)]
script_data_script_1_F

Unnamed: 0,utt_id,EDA,filename,session_number,speaker,start,end,utterance,original_order
365,0,qw,Ses01F_script01_1,1,F,6.2100,9.3200,What's he going to say?,0
366,1,sv,Ses01F_script01_1,1,F,9.3500,12.8955,Maybe we should tell him before he sees it.,1
367,0,sd,Ses01F_script01_1,1,M,11.9892,15.1219,He saw it.,2
368,2,sd,Ses01F_script01_1,1,F,14.3063,19.5526,How could he see it? I was the first one up; ...,3
369,1,sd,Ses01F_script01_1,1,M,19.0456,22.3962,He was out here when it broke.,4
...,...,...,...,...,...,...,...,...,...
1177,37,sd,Ses01F_script01_3,1,M,419.4437,422.5035,I just want you to know Annie.,71
1178,33,sv,Ses01F_script01_3,1,F,422.4670,432.4477,Because you mustn't feel that way. Everything...,72
1179,34,sv,Ses01F_script01_3,1,F,432.5570,450.4700,"Even me. And your money? Chris, there's noth...",73
1180,38,sd,Ses01F_script01_3,1,M,451.0400,460.2800,"Annie, Annie. I am going to make a fortune fo...",74


In [35]:
script_data_script_1_F = final_df[(final_df["filename"].str.contains("F_script01")) & (final_df["session_number"] == 2)]
script_data_script_1_F

Unnamed: 0,utt_id,EDA,filename,session_number,speaker,start,end,utterance,original_order
1880,0,qw,Ses02F_script01_2,2,F,5.84,8.12,Why did he invite her here?,0
1881,0,qw,Ses02F_script01_2,2,M,8.08,9.72,Why does that bother you?,1
1882,1,sd,Ses02F_script01_2,2,F,9.30,13.51,She's been in New York for three and half year...,2
1883,1,sd,Ses02F_script01_2,2,M,12.53,15.66,"Well, maybe he just wanted to see her again.",3
1884,2,sd,Ses02F_script01_2,2,F,14.96,18.76,Nobody comes seven hundred miles just to see.,4
...,...,...,...,...,...,...,...,...,...
3468,35,sd,Ses02F_script01_3,2,M,334.61,336.60,I want you now Annie.,63
3469,27,sv,Ses02F_script01_3,2,F,336.91,344.54,'Cause you mustn't feel that way. Because you...,64
3470,28,sv,Ses02F_script01_3,2,F,344.60,353.22,"I mean, and your money, there's nothing wrong ...",65
3471,36,sd,Ses02F_script01_3,2,M,353.64,357.87,"I know Annie. Annie, I'm gonna make a fortune...",66


In [33]:
script_data_script_1_M = final_df[(final_df["filename"].str.contains("M_script01")) & (final_df["session_number"] == 5) & (final_df["original_order"] == 0)]
script_data_script_1_M
# improv_data = final_df[final_df["filename"].str.contains("impro")]

# len(script_data), len(improv_data), len(script_data)+len(improv_data)

Unnamed: 0,utt_id,EDA,filename,session_number,speaker,start,end,utterance,original_order
7999,0,qw,Ses05M_script01_2,5,F,4.9869,8.24,Why did you invite her here?,0
8393,0,sd,Ses05M_script01_3,5,F,3.34,6.29,You're the only one I know who loves his parents.,0
8545,0,qh,Ses05M_script01_1,5,F,6.07,10.29,What's he going to say? Maybe we should tell ...,0
8831,0,xx,Ses05M_script01_1b,5,F,3.1063,8.86,What's he going to say? Maybe we ought to tel...,0


# Clipping videos

In [1]:
import os
import pandas as pd
import re
from moviepy.editor import VideoFileClip, concatenate_videoclips

In [2]:
session = "Ses01F_impro03"
f = f"IEMOCAP_full_release/Session1/dialog/transcriptions/{session}.txt"
video = f"IEMOCAP_full_release/Session1/dialog/avi/DivX/{session}.avi"

with open(f, "r") as file:
    lines = file.readlines()

d = {"start": [], "end": [], "speaker": [], "utterance": []}

for i, line in enumerate(lines):
    speaker_info, utterance = line.split(":")[0], line.split(":")[1]
    pattern = r"(F|M)\d+\s\[(\d+\.\d+)-(\d+\.\d+)\]"
    match = re.search(pattern, speaker_info)
    if match is None:
        continue
    start = match.group(2)
    end = match.group(3)
    d["start"].append(start) 
    d["end"].append(end)
    d["speaker"].append(match.group(1))
    d["utterance"].append(utterance.strip())

    clip = VideoFileClip(video).subclip(start, end)  # This form is a Z.X.four.\n'
    clip.write_videofile(f"{session}_{i}.mp4")

df = pd.DataFrame(d)
# df.to_csv(f"IEMOCAP_full_release/Session1/dialog/transcriptions/{session}.csv")

Moviepy - Building video Ses01F_impro03_0.mp4.
MoviePy - Writing audio in Ses01F_impro03_0TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_0.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_0.mp4




Moviepy - Building video Ses01F_impro03_1.mp4.
MoviePy - Writing audio in Ses01F_impro03_1TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_1.mp4



                                                             

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_1.mp4
Moviepy - Building video Ses01F_impro03_2.mp4.
MoviePy - Writing audio in Ses01F_impro03_2TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_2.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_2.mp4




Moviepy - Building video Ses01F_impro03_3.mp4.
MoviePy - Writing audio in Ses01F_impro03_3TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_3.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_3.mp4
Moviepy - Building video Ses01F_impro03_4.mp4.
MoviePy - Writing audio in Ses01F_impro03_4TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_4.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_4.mp4
Moviepy - Building video Ses01F_impro03_5.mp4.
MoviePy - Writing audio in Ses01F_impro03_5TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_5.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_5.mp4
Moviepy - Building video Ses01F_impro03_6.mp4.
MoviePy - Writing audio in Ses01F_impro03_6TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_6.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_6.mp4
Moviepy - Building video Ses01F_impro03_7.mp4.
MoviePy - Writing audio in Ses01F_impro03_7TEMP_MPY_wvf_snd.mp3


                                                        

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_7.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_7.mp4
Moviepy - Building video Ses01F_impro03_8.mp4.
MoviePy - Writing audio in Ses01F_impro03_8TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_8.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_8.mp4
Moviepy - Building video Ses01F_impro03_9.mp4.
MoviePy - Writing audio in Ses01F_impro03_9TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_9.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_9.mp4
Moviepy - Building video Ses01F_impro03_10.mp4.
MoviePy - Writing audio in Ses01F_impro03_10TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_10.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_10.mp4
Moviepy - Building video Ses01F_impro03_11.mp4.
MoviePy - Writing audio in Ses01F_impro03_11TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_11.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_11.mp4
Moviepy - Building video Ses01F_impro03_12.mp4.
MoviePy - Writing audio in Ses01F_impro03_12TEMP_MPY_wvf_snd.mp3


                                                        

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_12.mp4



                                                               

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_12.mp4
Moviepy - Building video Ses01F_impro03_13.mp4.
MoviePy - Writing audio in Ses01F_impro03_13TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_13.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_13.mp4
Moviepy - Building video Ses01F_impro03_14.mp4.
MoviePy - Writing audio in Ses01F_impro03_14TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_14.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_14.mp4
Moviepy - Building video Ses01F_impro03_15.mp4.
MoviePy - Writing audio in Ses01F_impro03_15TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_15.mp4



                                                             

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_15.mp4
Moviepy - Building video Ses01F_impro03_16.mp4.
MoviePy - Writing audio in Ses01F_impro03_16TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_16.mp4



                                                             

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_16.mp4
Moviepy - Building video Ses01F_impro03_17.mp4.
MoviePy - Writing audio in Ses01F_impro03_17TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_17.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_17.mp4




Moviepy - Building video Ses01F_impro03_18.mp4.
MoviePy - Writing audio in Ses01F_impro03_18TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_18.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_18.mp4




Moviepy - Building video Ses01F_impro03_19.mp4.
MoviePy - Writing audio in Ses01F_impro03_19TEMP_MPY_wvf_snd.mp3


                                                        

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_19.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_19.mp4
Moviepy - Building video Ses01F_impro03_20.mp4.
MoviePy - Writing audio in Ses01F_impro03_20TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_20.mp4



                                                             

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_20.mp4
Moviepy - Building video Ses01F_impro03_21.mp4.
MoviePy - Writing audio in Ses01F_impro03_21TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_21.mp4



                                                             

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_21.mp4
Moviepy - Building video Ses01F_impro03_22.mp4.
MoviePy - Writing audio in Ses01F_impro03_22TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_22.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_22.mp4
Moviepy - Building video Ses01F_impro03_23.mp4.
MoviePy - Writing audio in Ses01F_impro03_23TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_23.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_23.mp4
Moviepy - Building video Ses01F_impro03_25.mp4.
MoviePy - Writing audio in Ses01F_impro03_25TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_25.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_25.mp4
Moviepy - Building video Ses01F_impro03_27.mp4.
MoviePy - Writing audio in Ses01F_impro03_27TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_27.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_27.mp4
Moviepy - Building video Ses01F_impro03_28.mp4.
MoviePy - Writing audio in Ses01F_impro03_28TEMP_MPY_wvf_snd.mp3


                                                        

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_28.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_28.mp4
Moviepy - Building video Ses01F_impro03_29.mp4.
MoviePy - Writing audio in Ses01F_impro03_29TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_29.mp4



                                                             

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_29.mp4
Moviepy - Building video Ses01F_impro03_30.mp4.
MoviePy - Writing audio in Ses01F_impro03_30TEMP_MPY_wvf_snd.mp3


                                                        

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_30.mp4



                                                               

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_30.mp4
Moviepy - Building video Ses01F_impro03_31.mp4.
MoviePy - Writing audio in Ses01F_impro03_31TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_31.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_31.mp4
Moviepy - Building video Ses01F_impro03_32.mp4.
MoviePy - Writing audio in Ses01F_impro03_32TEMP_MPY_wvf_snd.mp3


                                                        

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_32.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_32.mp4
Moviepy - Building video Ses01F_impro03_33.mp4.
MoviePy - Writing audio in Ses01F_impro03_33TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_33.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_33.mp4
Moviepy - Building video Ses01F_impro03_34.mp4.
MoviePy - Writing audio in Ses01F_impro03_34TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_34.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_34.mp4




Moviepy - Building video Ses01F_impro03_35.mp4.
MoviePy - Writing audio in Ses01F_impro03_35TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_35.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_35.mp4




Moviepy - Building video Ses01F_impro03_36.mp4.
MoviePy - Writing audio in Ses01F_impro03_36TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_36.mp4



                                                             

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_36.mp4
Moviepy - Building video Ses01F_impro03_37.mp4.
MoviePy - Writing audio in Ses01F_impro03_37TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_37.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_37.mp4
Moviepy - Building video Ses01F_impro03_38.mp4.
MoviePy - Writing audio in Ses01F_impro03_38TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_38.mp4



                                                             

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_38.mp4
Moviepy - Building video Ses01F_impro03_39.mp4.
MoviePy - Writing audio in Ses01F_impro03_39TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_39.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_39.mp4




Moviepy - Building video Ses01F_impro03_40.mp4.
MoviePy - Writing audio in Ses01F_impro03_40TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_40.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_40.mp4




Moviepy - Building video Ses01F_impro03_41.mp4.
MoviePy - Writing audio in Ses01F_impro03_41TEMP_MPY_wvf_snd.mp3


                                                        

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_41.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_41.mp4
Moviepy - Building video Ses01F_impro03_42.mp4.
MoviePy - Writing audio in Ses01F_impro03_42TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_42.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_42.mp4
Moviepy - Building video Ses01F_impro03_43.mp4.
MoviePy - Writing audio in Ses01F_impro03_43TEMP_MPY_wvf_snd.mp3


                                                        

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_43.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_43.mp4
Moviepy - Building video Ses01F_impro03_44.mp4.
MoviePy - Writing audio in Ses01F_impro03_44TEMP_MPY_wvf_snd.mp3


                                                        

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_44.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_44.mp4
Moviepy - Building video Ses01F_impro03_45.mp4.
MoviePy - Writing audio in Ses01F_impro03_45TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_45.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_45.mp4
Moviepy - Building video Ses01F_impro03_46.mp4.
MoviePy - Writing audio in Ses01F_impro03_46TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_46.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_46.mp4
Moviepy - Building video Ses01F_impro03_47.mp4.
MoviePy - Writing audio in Ses01F_impro03_47TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_47.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_47.mp4




Moviepy - Building video Ses01F_impro03_48.mp4.
MoviePy - Writing audio in Ses01F_impro03_48TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_48.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_48.mp4
Moviepy - Building video Ses01F_impro03_49.mp4.
MoviePy - Writing audio in Ses01F_impro03_49TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_49.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_49.mp4
Moviepy - Building video Ses01F_impro03_50.mp4.
MoviePy - Writing audio in Ses01F_impro03_50TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_50.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_50.mp4
Moviepy - Building video Ses01F_impro03_51.mp4.
MoviePy - Writing audio in Ses01F_impro03_51TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_51.mp4



                                                              

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_51.mp4
Moviepy - Building video Ses01F_impro03_52.mp4.
MoviePy - Writing audio in Ses01F_impro03_52TEMP_MPY_wvf_snd.mp3


                                                       

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_52.mp4



                                                   

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_52.mp4
Moviepy - Building video Ses01F_impro03_53.mp4.
MoviePy - Writing audio in Ses01F_impro03_53TEMP_MPY_wvf_snd.mp3


                                                        

MoviePy - Done.
Moviepy - Writing video Ses01F_impro03_53.mp4



                                                               

Moviepy - Done !
Moviepy - video ready Ses01F_impro03_53.mp4
