# to-do list
1) extract feature
2) label ([exc, hap, sur] 21%,[neu, fru] 42%,[fea, ang, sad, dis] 38%) -> 1 file.csv
3) process dims (double), bias (remove)
- add noise
- process real data

In [3]:
# !pip install torchaudio
# !pip install torch
# !pip install soundfile
#!pip install PySoundFile
# print(torchaudio.__version__)
# print(torch.__version__)
#!pip install numpy

In [9]:
import os, shutil
import torchaudio
from io import StringIO
import torch
import sndhdr
import soundfile
import csv,glob
import pandas as pd
import numpy as np

## 1) EXTRACT FEATURE

In [62]:
def link_to_data():
    # Define the path to your data folder
    data_folder_path = os.path.join('..', 'IEMOCAP_release_full')

    # Create a link to the data folder
    data_folder_link = os.path.dirname(os.path.abspath(data_folder_path))

    # Print the link
    return(data_folder_link)

In [63]:
#Đọc link file
def read_link_wav(type = ('sentences', 'dialog'), sessions = [1,2,3,4,5]):
    source = link_to_data()
    link = os.path.join(source,'IEMOCAP_full_release', 'Session'+str(sessions), type, 'wav')
    links = {}
    if type == 'dialog':
        return
    for dialog in os.listdir(link):
        list_sentence = []
        for sentence in os.listdir(os.path.join(link,dialog)):
            sentence_link = os.path.join(link, dialog, sentence)
            list_sentence.append(sentence_link)
        links[dialog] = list_sentence
    return links

In [64]:

#.wav to .pt
def read_and_save_mfcc(types = ('dialog','sentences')):
    for session in range(1,6):
        links = read_link_wav(types, session)
        source = link_to_data()
        root = os.path.join(source,'IEMOCAP_full_release','Session'+str(session),types)
        if os.path.exists(os.path.join(root,'mfcc')):
            shutil.rmtree(os.path.join(root,'mfcc'))
        os.makedirs(os.path.join(root,'mfcc'))
        for dia in links.keys():
            if os.path.exists(os.path.join(root,'mfcc',dia)):
                shutil.rmtree(os.path.join(root,'mfcc',dia))
            os.makedirs(os.path.join(root,'mfcc',dia))
            for sen in links[dia]:
                try:
                    if sndhdr.what(sen).filetype == 'wav':
                        waveform, sample_rate = torchaudio.load(sen)
                        transform = torchaudio.transforms.MFCC(sample_rate=sample_rate)
                        mfcc = transform(waveform)
                        output_file = os.path.join(root,'mfcc',dia,os.path.basename(sen)[:-4]+'.pt')
                        torch.save(mfcc,output_file)
                except:
                    pass

In [79]:
# # extract feature .wav to .pt
# read_and_save_mfcc('sentences')

## 2) Label + remove noise

In [69]:

def get_shape(file):
    mfcc = torch.load(file)
    return list(mfcc.shape)
def update_sample(categories,session,words):
    source = link_to_data()
    if len(words)==4:
        path_file = os.path.join(source,'IEMOCAP_full_release','Session'+str(session),'sentences','mfcc',words[1][:-5],words[1]+'.pt')
        if os.path.exists(path_file):
            shape = get_shape(path_file)
            with open('processed_label_data.csv',mode = 'a',newline = '') as f:
                write = csv.writer(f)
                if (words[2]) in categories['positive']:
                    write.writerow([words[1],words[2],1,0,0,shape,shape[1:],path_file])
                if (words[2]) in categories['neutral']:
                    write.writerow([words[1],words[2],0,1,0,shape,shape[1:],path_file])
                if (words[2]) in categories['negative']:
                    write.writerow([words[1],words[2],0,0,1,shape,shape[1:],path_file])
                if (words[2]) in ['xxx','oth']:
                    os.remove(path_file)
def create_label_file():
    source = link_to_data()
    categories = {'positive': ['exc', 'hap', 'sur'],
                'neutral': ['neu','fru'],
                'negative': ['fea','ang','sad','dis']}
    header = ['sessionID','labels','positive','neutral','negative','dims','time_shape','path']
    with open('processed_label_data.csv',mode='w',newline='') as file:
        writer = csv.writer(file)
        writer.writerow(header)
    for session in range(1,6):
        root = os.path.join(source,'IEMOCAP_full_release','Session'+str(session),'dialog','EmoEvaluation','*')
        files = glob.glob(root)
        for file in files:
            if file[-3:] =='txt': 
                with open(file, 'r') as lines:
                    for line in lines:
                        words = line.split("\t")
                        update_sample(categories,session,words)

In [77]:
# create_label_file()

## 3) PREPROCESS DATA

In [72]:
#check for mono file audio or not
def check_mono():
    source = link_to_data()
    for session in range(1,6):
        root = os.path.join(source,'IEMOCAP_full_release','Session'+str(session),'sentences','mfcc')
        for dialog in os.listdir(root):
            for sentence in os.listdir(os.path.join(root,dialog)):
                sample = torch.load(os.path.join(root,dialog,sentence))
                if sample.shape[0]!=1:
                    return False
    return True

In [73]:
def reform_file_data():
    df = pd.read_csv('processed_label_data.csv')
    df['dims'] = df['dims'].apply(lambda x: [int(i) for i in x.strip('[]').split(',')])
    if str(df['time_shape'].dtype) == 'object':
        df['time_shape'] = df['time_shape'].apply(lambda x: x.strip('[]').split(','))
    df.to_csv('processed_label_data.csv', index=False)
    return df

In [74]:
#convert to size(128,1500)
def normalize_shape(time_shape):
    df = reform_file_data()
    for sample in range(len(df)):
        tmp_sample = torch.load(df.path[sample])
        # if len(tmp_sample.shape) == 3:
        #     tmp_sample = tmp_sample.resize(40,tmp_sample.shape[2])
        if tmp_sample.shape[2] == time_shape:
            continue
        if tmp_sample.shape[2] >time_shape:
            tmp_sample = tmp_sample[:,:,:time_shape]
        while tmp_sample.shape[2]<time_shape:
            last_dims = time_shape - tmp_sample.shape[2]
            if last_dims > tmp_sample.shape[2]:
                tmp_sample = torch.cat((tmp_sample,tmp_sample),dim=2)
            else:
                tmp_sample = torch.cat((tmp_sample,tmp_sample[:,:,:last_dims]),dim=2)
        tmp_sample = torch.cat((tmp_sample,tmp_sample), dim = 0)
        torch.save(tmp_sample,df.path[sample])
        df['time_shape'][sample] = time_shape
        df['dims'][sample] = [40,time_shape]
    df.to_csv('processed_shape_data.csv')

In [75]:
def check_shape(time_shape):
    df = reform_file_data()
    for sample in range(len(df)):
        tmp_sample = torch.load(df.path[sample])
        if list(tmp_sample.shape) != [40,time_shape]:
            print(list(tmp_sample.shape))
            return False
    return True

In [78]:
# print('Mono before nomalize: ',check_mono())
# normalize_shape(900)
# print('Mono after nomalize: ',check_mono())
# check_shape(900)