In [274]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report
from sklearn.mixture import GaussianMixture
import os
from feature_extraction import *
import warnings
warnings.filterwarnings('ignore')

In [570]:
TRAIN_PATH = "../data/processed/train2/"
# TEST_PATH = "../data/processed/test/"
REF_PATH = "../data/processed/reference/"

In [571]:
def setup_df(PATH):
    all_files = os.listdir(PATH)
    df = pd.DataFrame(all_files, columns=['file'])
    df['speaker'] = df['file'].apply(lambda x: int(x.split('.')[0].split('-')[-1]))
    # 0 for neutral, 1 for emotional
    df['speech_type'] = df['file'].apply(lambda x: 0 if int(x.split('.')[0].split('-')[2])==1 else 1)
    df['F0_contour'] = df['file'].apply(lambda x: get_F0_contour(PATH+x))
    return df

In [572]:
def get_audio_features(df):
#     df['SQ25'] = df['F0_contour'].apply(get_SQ25)
    df['SQ75'] = df['F0_contour'].apply(get_SQ75)
#     df['F0_median'] = df['F0_contour'].apply(get_F0_median)
#     df['sdmedian'] = df['F0_contour'].apply(get_sdmedian)
    df['IDR'] = df['F0_contour'].apply(get_IDR)
    df['voiced_segments'] = df['F0_contour'].apply(get_voiced_segments)
    df['SVMeanRange'] = df['voiced_segments'].apply(get_voiced_segment_range)
    df['SVMaxCurv'] = df['voiced_segments'].apply(get_max_voiced_curvature)
    
    return df

In [573]:
ref_df = setup_df(REF_PATH)
ref_df = get_audio_features(ref_df)
temp_df = setup_df(TRAIN_PATH)
# test_df = setup_df(TEST_PATH)

## Training the GMMs on the Reference Data

In [574]:
trained_GMMs = {}
# features = [ 'SQ25', 'SQ75','F0_median', 'sdmedian', 'IDR', 'SVMeanRange','SVMaxCurv']
features = [ 'SQ75','IDR', 'SVMeanRange','SVMaxCurv']

for feature in features:
    gmm = GaussianMixture(n_components=2)
    gmm.fit(ref_df[feature].values.reshape(-1,1))
    trained_GMMs[feature] = gmm

## IFN Algo

In [736]:
ref_df['F0_contour_sum'] = ref_df['F0_contour'].apply(sum)

In [737]:
ref_df['F0_contour_length'] = ref_df['F0_contour'].apply(len)

In [738]:
ref_df['F0_contour_mean'] = ref_df['F0_contour_sum']/ref_df['F0_contour_length']

In [739]:
avg_F0_ref = np.sum(ref_df['F0_contour_mean']) / 24

In [740]:
def stratified_sample_df(df):
    per_speaker_neutral_number = df[df['speech_type']==0]['speaker'].value_counts().values[0]
    sampled_df = []
    sampled_df.append(df[df['speech_type']==0])
    speakers = df['speaker'].unique()
    for speaker in speakers:
        speaker_df = df[df['speaker'] == speaker]
        speaker_emotional_df= speaker_df[speaker_df['speech_type']==1]
        sampled_df.append(speaker_emotional_df.sample(n=per_speaker_neutral_number))
    return pd.concat(sampled_df).reset_index(drop=True)

In [741]:
def infer_GMM(df):
    infered = np.zeros(shape=(1,len(features)))
    for i in range(len(features)):
        res = trained_GMMs[features[i]].score_samples([[df[features[i]]]])
        infered[:,i] = res
    return infered[0]

In [742]:
def get_changed_labels(neutral_list, emotional_list, row):
    if row['file'] in neutral_list:
        return 0
    else:
        return 1

In [743]:
def get_S_s_F0(F0_ref, df):
#     grouped_df_neu = df[df['speech_type']==0].groupby(['speaker']).sum().reset_index(level=0)
#     grouped_df_neu = grouped_df_neu[['speaker','F0_contour_sum','F0_contour_length']]
#     grouped_df_neu['F0_s_neu'] = grouped_df_neu['F0_contour_sum'] / grouped_df_neu['F0_contour_length']
#     grouped_df_neu['S_s_F0'] = F0_ref / grouped_df_neu['F0_s_neu']

    df_neu = df[df['speech_type']==0]
    speakers = df['speaker'].unique()
    grouped_df_neu = {}
    for speaker in speakers:
        speaker_df_neu = df_neu[df_neu['speaker']==speaker]
        speaker_mean = speaker_df_neu['F0_contour_sum'].sum()/speaker_df_neu['F0_contour_length'].sum()
        grouped_df_neu[speaker] = F0_ref/speaker_mean
        
        
    
#     return grouped_df_neu[['speaker','S_s_F0']].set_index('speaker').to_dict()['S_s_F0']
    return grouped_df_neu

In [744]:
def get_normalised_df(df, avg_F0_ref=avg_F0_ref, S0_func=get_S_s_F0):
    df_norm = S0_func(avg_F0_ref, df)
    df['F0_contour'] = df.apply(lambda x: x['F0_contour']/df_norm[x['speaker']], axis=1)
    return df_norm, df
        

In [745]:
def get_normalised_df_infer(df, df_norm):
    df['F0_contour'] = df.apply(lambda x: x['F0_contour']/df_norm[x['speaker']], axis=1)
    return df_norm, df

In [746]:
def get_stopping_criteria(df, count):
    grouped_sampled_df = df.groupby('speaker')
    neutral = []
    emotional = []

    for name, group in grouped_sampled_df:
        neu_result = group[group['predicted_likelihood'] >= 0.7]
        emo_result = group[group['predicted_likelihood'] < 0.7]

        total = group.shape[0]
        to_add = int(np.ceil(0.2 * total)) - neu_result.shape[0]
        converted_neu_add = neu_result
        if to_add > 0:
            emo_result_sort = emo_result.sort_values('predicted_likelihood', ascending=False)
            converted_neu_add = emo_result_sort.head(to_add)
            emo_result.drop(converted_neu_add.index, inplace=True)

        neutral.extend(converted_neu_add['file'].tolist())
        emotional.extend(emo_result['file'].tolist()) 

    df['changed_speech_type'] = df.apply(lambda x: \
                                         get_changed_labels(neutral,emotional,x), axis=1) 
    
    if count==0:
        return df, 1000000

    else:
        changed_dict = (df['prev_changed_speech_type'] != df['changed_speech_type']).\
                        value_counts().to_dict()

        if True not in changed_dict.keys():
            changed_dict[True] = 0
            epsilon = changed_dict[True]/changed_dict[False]
        elif False not in changed_dict.keys():
            epsilon = 1000000
        else:
            epsilon = changed_dict[True]/changed_dict[False]
    
    return df, epsilon

In [747]:
# for every epoch
epoch = 0

for iterations in range(1):

    sampled_df = stratified_sample_df(temp_df)
    
    train_df, test_df = train_test_split(sampled_df, test_size = 0.33, stratify = sampled_df[['speaker','speech_type']])
    
    train_df = get_audio_features(train_df)
    

    test_df['F0_contour_sum'] = test_df['F0_contour'].apply(sum)
    test_df['F0_contour_length'] = test_df['F0_contour'].apply(len)
    
    count = 0

    # for 400 iterations
    sampled_df_norm = None
    ldc_clf = None
    epsilon = 1000000
    max_iters = 1000
    
    ldc_clf = LinearDiscriminantAnalysis(solver='lsqr')

    
    while epsilon > 0.05 and count < max_iters:
        
        print('=========================================')   

            
        for stage in ['train', 'test']:
            
            print('----------------------------------------')   
            print('Stage - ', stage)


        
            if stage == 'train':
                
                train_df['F0_contour_sum'] = train_df['F0_contour'].apply(sum)
                train_df['F0_contour_length'] = train_df['F0_contour'].apply(len)


                if count != 0:
                    train_df['prev_changed_speech_type'] = train_df['changed_speech_type']
                    sampled_df_norm, train_df = get_normalised_df(train_df, avg_F0_ref=avg_F0_ref, S0_func=get_S_s_F0)

                print(sampled_df_norm)

                train_df = get_audio_features(train_df)
                train_df['inferred'] = train_df.apply(infer_GMM,axis=1)

                print(train_df['speech_type'].value_counts())
                ldc_clf.fit(np.array(train_df['inferred'].tolist()),train_df['speech_type'].values)

                train_df['predicted_likelihood'] = ldc_clf.predict_proba(np.array(train_df['inferred'].tolist()))[:,0]

                train_df, epsilon = get_stopping_criteria(train_df, count)

                print(count)
                print(epsilon)

                print(epsilon > 0.05)

            else:
                sampled_df_test = test_df
                if count!=0:
                    _, sampled_df_test = get_normalised_df_infer(test_df, sampled_df_norm)
                sampled_df_test = get_audio_features(sampled_df_test)
                sampled_df_test['inferred'] = sampled_df_test.apply(infer_GMM,axis=1)

                sampled_df_test['predicted_likelihood'] = ldc_clf.predict_proba(np.array(sampled_df_test['inferred'].tolist()))[:,0]
                sampled_df_test['predicted_speech_type'] = sampled_df_test['predicted_likelihood'].\
                                                            apply(lambda x: 0 if x >=0.7 else 1)

                print(classification_report(sampled_df_test['speech_type'],sampled_df_test['predicted_speech_type']))
                
        count+=1



----------------------------------------
Stage -  train
None
1    48
0    48
Name: speech_type, dtype: int64
0
1000000
True
----------------------------------------
Stage -  test
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        24
           1       0.50      1.00      0.67        24

    accuracy                           0.50        48
   macro avg       0.25      0.50      0.33        48
weighted avg       0.25      0.50      0.33        48

----------------------------------------
Stage -  train
{15: 1.4674614092583878, 6: 0.6741689339891211, 8: 0.7943161392559523, 12: 0.7351361697381131, 2: 0.7674308481235315, 16: 0.8852915540059546, 3: 1.1081498631085704, 17: 1.4870837839925748, 1: 1.5720901716609383, 11: 1.432112160067652, 23: 1.0805133334181214, 5: 1.7364034626865223, 7: 1.2314673327525052, 21: 1.9243547640720389, 10: 0.7143033663787923, 19: 1.287622575542617, 14: 0.7202831186941038, 4: 0.7676826469333605, 22: 0.792103561

1    48
0    48
Name: speech_type, dtype: int64
8
0.06666666666666667
True
----------------------------------------
Stage -  test
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        24
           1       0.50      1.00      0.67        24

    accuracy                           0.50        48
   macro avg       0.25      0.50      0.33        48
weighted avg       0.25      0.50      0.33        48

----------------------------------------
Stage -  train
{15: 4.3759507415453906e+42, 6: 1.4614920479577336e-44, 8: 2.5021136149135432e-26, 12: 6.168144529822405e-35, 2: 3.715768166711982e-30, 16: 2.845104055257255e-14, 3: 261345645698.8088, 17: 1.3118134858898616e+44, 1: 1.9871372565538745e+50, 11: 8.513738013690656e+39, 23: 406736720.16945076, 5: 2.2416337961716706e+61, 7: 1.4068937945772576e+23, 21: 5.984056383321039e+72, 10: 3.9261817578908674e-38, 19: 1.276077664167421e+28, 14: 3.3176341850330395e-37, 4: 4.041301681688493e-30, 22: 1.

ValueError: array must not contain infs or NaNs

In [722]:
ref_df['F0_contour_length'].sum()/24

348.5416666666667