In [356]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.mixture import GaussianMixture
import os
from feature_extraction import *

In [87]:
TRAIN_PATH = "../data/processed/train/"
TEST_PATH = "../data/processed/test/"
REF_PATH = "../data/processed/reference/"

In [92]:
def setup_df(PATH):
    all_files = os.listdir(PATH)
    df = pd.DataFrame(all_files, columns=['file'])
    df['speaker'] = df['file'].apply(lambda x: int(x.split('.')[0].split('-')[-1]))
    # 0 for neutral, 1 for emotional
    df['speech_type'] = df['file'].apply(lambda x: 0 if int(x.split('.')[0].split('-')[2])==1 else 1)
    df['F0_contour'] = df['file'].apply(lambda x: get_F0_contour(PATH+x))
    return df

In [93]:
def get_audio_features(df):
    df['SQ25'] = df['F0_contour'].apply(get_SQ25)
    df['SQ75'] = df['F0_contour'].apply(get_SQ75)
    df['F0_median'] = df['F0_contour'].apply(get_F0_median)
    df['sdmedian'] = df['F0_contour'].apply(get_sdmedian)
    df['IDR'] = df['F0_contour'].apply(get_IDR)
    df['voiced_segments'] = df['F0_contour'].apply(get_voiced_segments)
    df['SVMeanRange'] = df['voiced_segments'].apply(get_voiced_segment_range)
    df['SVMaxCurv'] = df['voiced_segments'].apply(get_max_voiced_curvature)
    
    return df

In [95]:
ref_df = setup_df(REF_PATH)
ref_df = get_audio_features(ref_df)
train_df = setup_df(TRAIN_PATH)
test_df = setup_df(TEST_PATH)

In [287]:
train_df = get_audio_features(train_df)

## Training the GMMs on the Reference Data

In [316]:
trained_GMMs = {}
features = [ 'SQ25', 'SQ75','F0_median', 'sdmedian', 'IDR', 'SVMeanRange','SVMaxCurv']
for feature in features:
    gmm = GaussianMixture(n_components=2)
    gmm.fit(ref_df[feature].values.reshape(-1,1))
    trained_GMMs[feature] = gmm

  random_state=random_state).fit(X).labels_
  random_state=random_state).fit(X).labels_
  random_state=random_state).fit(X).labels_


## IFN Algo

In [101]:
ref_df['F0_contour_sum'] = ref_df['F0_contour'].apply(sum)

In [102]:
ref_df['F0_contour_length'] = ref_df['F0_contour'].apply(len)

In [103]:
avg_F0_ref = np.sum(ref_df['F0_contour_sum']) / np.sum(ref_df['F0_contour_length'])

In [104]:
train_df['F0_contour_sum'] = train_df['F0_contour'].apply(sum)
train_df['F0_contour_length'] = train_df['F0_contour'].apply(len)
test_df['F0_contour_sum'] = test_df['F0_contour'].apply(sum)
test_df['F0_contour_length'] = test_df['F0_contour'].apply(len)

In [186]:
def get_S_s_F0(F0_ref, df):
    grouped_df_neu = df[df['speech_type']==0].groupby(['speaker']).sum().reset_index(level=0)
    grouped_df_neu = grouped_df_neu[['speaker','F0_contour_sum','F0_contour_length']]
    grouped_df_neu['F0_s_neu'] = grouped_df_neu['F0_contour_sum'] / grouped_df_neu['F0_contour_length']
    grouped_df_neu['S_s_F0'] = F0_ref / grouped_df_neu['F0_s_neu']
    
    return grouped_df_neu[['speaker','S_s_F0']].set_index('speaker').to_dict()['S_s_F0']

In [187]:
def stratified_sample_df(df):
    per_speaker_neutral_number = df[df['speech_type']==0]['speaker'].value_counts().values[0]
    sampled_df = []
    sampled_df.append(df[df['speech_type']==0])
    speakers = df['speaker'].unique()
    for speaker in speakers:
        speaker_df = df[df['speaker'] == speaker]
        speaker_emotional_df= speaker_df[speaker_df['speech_type']==1]
        sampled_df.append(speaker_emotional_df.sample(n=per_speaker_neutral_number))
    return pd.concat(sampled_df).reset_index(drop=True)

In [349]:
def infer_GMM(df):
    infered = np.zeros(shape=(1,len(features)))
    for i in range(len(features)):
        res = trained_GMMs[features[i]].score_samples([[df[features[i]]]])
        infered[:,i] = res
    return infered[0]

In [439]:
def get_changed_labels(neutral_list, emotional_list, row):
    if row['file'] in neutral_list:
        return 0
    else:
        return 1

In [357]:
# for every epoch

# for 400 iterations

# subsample the df to represent equal number of emotional and neutral classes
sampled_df = stratified_sample_df(train_df)
sampled_df_norm = get_S_s_F0(avg_F0_ref, sampled_df)
sampled_df['F0_contour'] = sampled_df.apply(lambda x: x['F0_contour']/sampled_df_norm[x['speaker']], axis=1)
sampled_df = get_audio_features(sampled_df)
sampled_df['inferred'] = sampled_df.apply(infer_GMM,axis=1)


In [393]:
clf = LinearDiscriminantAnalysis()
clf.fit(np.array(sampled_df['inferred'].tolist()),sampled_df['speech_type'].values)

LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
                           solver='svd', store_covariance=False, tol=0.0001)

In [415]:
sampled_df['predicted_likelihood'] = clf.predict_proba(np.array(sampled_df['inferred'].tolist()))[:,0]

In [437]:
grouped_sampled_df = sampled_df.groupby('speaker')

In [438]:
neutral = []
emotional = []

for name, group in grouped_sampled_df:
    neu_result = group[group['predicted_likelihood'] >= 0.7]
    emo_result = group[group['predicted_likelihood'] < 0.7]
        
    total = group.shape[0]
    to_add = int(0.2 * total) - neu_result.shape[0]
    converted_neu_add = None
    if to_add > 0:
        emo_result_sort = emo_result.sort_values('predicted_likelihood', ascending=False)
        converted_neu_add = emo_result_sort.head(to_add)
        emo_result.drop(converted_neu_add.index, inplace=True)
    
    neutral.extend(converted_neu_add['file'].tolist())
    emotional.extend(emo_result['file'].tolist()) 

In [None]:
sampled_df['changed_speech_type'] = sampled_df.app 