In [14]:
import os, shutil
import pandas as pd

In [2]:
# Code to create a base folder that will contain the spectrograms for both train and test sets:

base_dir = 'spectrograms_cnn_base'

os.mkdir(base_dir)






In [3]:
# Code to create a folder within the base folder for the train set:

train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)



In [4]:
# Code to create a folder within the base folder for the test set:
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)



In [5]:
# Code to create a folder within the base folder for the validation set:
validation_dir = os.path.join(base_dir, 'validation')
os.mkdir(validation_dir)



In [6]:
# Code to create 6 folders within train set folder (one per emotion):

emotions = ['ANG', 'DIS', 'FEA', 'HAP', 'SAD', 'NEU']

train_emos = []

for i in range(6):
    train_emos.append(os.path.join(train_dir, emotions[i]))
    os.mkdir(train_emos[i])
    




In [7]:
# Code to create 6 folders within test set folder (one per emotion):
test_emos = []

for i in range(6):
    test_emos.append(os.path.join(test_dir, emotions[i]))
    os.mkdir(test_emos[i])



In [8]:
# Code to create 6 folders within validation set folder (one per emotion):

val_emos = []

for i in range(6):
    val_emos.append(os.path.join(validation_dir, emotions[i]))
    os.mkdir(val_emos[i])





In [34]:
# Split the large train set (which we already have) into a smaller train set and validation set, 
# stratified by emotion:

train_files_list = os.listdir('Spectrograms_final/Train')

train_files_list.sort()

train_files_emotions = []

for file_name in train_files_list:
    emo = file_name.split('_')[2]
    train_files_emotions.append(emo)
    
train_files_df = pd.DataFrame({'FileName': train_files_list, 'Emotion': train_files_emotions})



from sklearn.model_selection import train_test_split


tt_files_df, val_files_df = train_test_split(train_files_df, 
                                            test_size = 0.2,
                                             random_state = 123,
                                             shuffle = True,
                                             stratify = train_files_df.Emotion.values)













In [40]:
tt_files_df.Emotion.value_counts(normalize = True)

NEU    0.178718
ANG    0.164359
DIS    0.164359
HAP    0.164359
SAD    0.164103
FEA    0.164103
Name: Emotion, dtype: float64

In [41]:
val_files_df.Emotion.value_counts(normalize = True)

NEU    0.179303
FEA    0.164959
SAD    0.163934
ANG    0.163934
DIS    0.163934
HAP    0.163934
Name: Emotion, dtype: float64

In [44]:
tt_files_df.head()

Unnamed: 0,FileName,Emotion
583,1011_TIE_SAD_XX.png,SAD
869,1017_DFA_SAD_XX.png,SAD
2295,1045_IWL_SAD_XX.png,SAD
4718,1088_TAI_ANG_XX.png,ANG
4498,1085_ITS_SAD_XX.png,SAD


In [53]:
# Add files in tt_files_df to respective folders (by emotion) in the Train folder:
tt_files_list = list(tt_files_df.FileName.values)

print(tt_files_list[0:20])


for filename in tt_files_list:
    emo = filename.split('_')[2]
    src = 'Spectrograms_final/Train/' + filename
    dest = 'spectrograms_cnn_base/train/' + emo
    shutil.copy(src, dest)

['1011_TIE_SAD_XX.png', '1017_DFA_SAD_XX.png', '1045_IWL_SAD_XX.png', '1088_TAI_ANG_XX.png', '1085_ITS_SAD_XX.png', '1016_WSI_FEA_XX.png', '1052_TSI_ANG_XX.png', '1072_TAI_ANG_XX.png', '1017_WSI_DIS_XX.png', '1020_IWW_NEU_XX.png', '1042_TSI_HAP_XX.png', '1050_ITH_NEU_XX.png', '1033_TIE_ANG_XX.png', '1037_IWL_SAD_XX.png', '1026_ITH_SAD_XX.png', '1071_ITH_FEA_XX.png', '1082_IWW_FEA_XX.png', '1014_ITH_NEU_XX.png', '1048_IWW_DIS_XX.png', '1082_MTI_NEU_XX.png']


In [54]:
# Add files in val_files_df to respective folders (by emotion) in the Validation folder:
val_files_list = list(val_files_df.FileName.values)

print(val_files_list[0:20])

for filename in val_files_list:
    emo = filename.split('_')[2]
    src = 'Spectrograms_final/Train/' + filename
    dest = 'spectrograms_cnn_base/validation/' + emo
    shutil.copy(src, dest)

['1085_TIE_FEA_XX.png', '1065_ITS_FEA_XX.png', '1035_TAI_SAD_XX.png', '1012_ITS_FEA_XX.png', '1061_DFA_ANG_XX.png', '1057_WSI_SAD_XX.png', '1023_MTI_ANG_XX.png', '1005_WSI_NEU_XX.png', '1076_TSI_DIS_XX.png', '1061_IEO_NEU_XX.png', '1010_IWW_NEU_XX.png', '1044_IOM_DIS_XX.png', '1052_DFA_NEU_XX.png', '1083_IWW_NEU_XX.png', '1091_MTI_FEA_XX.png', '1041_IOM_ANG_XX.png', '1091_ITH_NEU_XX.png', '1082_MTI_FEA_XX.png', '1015_MTI_NEU_XX.png', '1010_ITS_NEU_XX.png']


In [55]:

# Load the test files into the test folder (by emotion):

test_files_list = os.listdir('Spectrograms_final/Test')

print(test_files_list[0:20])

for filename in test_files_list:
    emo = filename.split('_')[2]
    src = 'Spectrograms_final/Test/' + filename
    dest = 'spectrograms_cnn_base/test/' + emo
    shutil.copy(src, dest)




['1008_MTI_HAP_XX.png', '1029_MTI_FEA_XX.png', '1070_TSI_DIS_XX.png', '1034_DFA_DIS_XX.png', '1043_ITH_ANG_XX.png', '1034_WSI_DIS_XX.png', '1029_DFA_FEA_XX.png', '1008_DFA_HAP_XX.png', '1039_ITH_DIS_XX.png', '1089_TIE_NEU_XX.png', '1029_WSI_FEA_XX.png', '1034_MTI_DIS_XX.png', '1013_IOM_DIS_XX.png', '1006_IOM_DIS_XX.png', '1080_IOM_FEA_XX.png', '1068_TAI_HAP_XX.png', '1049_TAI_FEA_XX.png', '1049_ITS_DIS_XX.png', '1080_TSI_SAD_XX.png', '1066_IWL_DIS_XX.png']
