In [15]:
import os
import time
import tqdm
from tqdm import tqdm
import scipy 
import numpy as np
import pandas as pd
import audb
import audiofile
from scipy.io.wavfile import read as read_wav
from matplotlib import pyplot as plt
import opensmile

import sklearn 
from sklearn.model_selection import train_test_split #train/test split 
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC #Model
from sklearn.metrics import confusion_matrix #Metrics 
from sklearn.metrics import f1_score 
from sklearn.model_selection import cross_val_score #Cross-validation
from sklearn.model_selection import KFold


import random #for shuffling values 
from scipy import stats

#List of prosodic features we will observe
features_prosody= ['F0semitoneFrom27.5Hz_sma3nz_amean','loudness_sma3_amean',
'mfcc1_sma3_amean',
'jitterLocal_sma3nz_amean',
'shimmerLocaldB_sma3nz_amean',
'hammarbergIndexV_sma3nz_amean']

# Function to perform one-hot encoding
def encode(label):
    if 'non-hate' in label:
        return 0
    elif 'female' in label:
        return 0
    elif 'parody' in label:
        return 0
    elif 'hate' in label:
        return 1
    elif 'male' in label:
        return 1
    elif 'satiric-informative' in label:
        return 1
    elif 'both' in label:
        return 2
    elif 'speech' in label:
        return 2
    elif 'response' in label:
        return 3
    elif 'debate'in label:
        return 4
    elif 'interview'in label:
        return 5
    elif 'informative'in label:
        return 6
    elif 'report'in label:
        return 7
    else:
        raise Exception('Error')

#Gets functional features of all audios 
def functional_features(list_with_files):
    path= "data/clean_audio1/"
    
    #  Define feature extractor
    smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.Functionals,)

    df_data= []

    for i in tqdm((list_with_files)):
        f= path + i
        sampling_rate, data = read_wav(f) #gets sampling rate and dimensions of data
        #print(f"Sampling rate of {f}:{sampling_rate}")
        
        db = audb.load('emodb',
        version='1.1.1',
        format='wav',
        mixdown=True,
        sampling_rate=sampling_rate,
        media='wav/03a01.*',  # load subset
        full_path=False,
        verbose=False,)

        #extract features
        signal, sampling_rate = audiofile.read(f, duration=120, always_2d=True)
        data= smile.process_signal(signal, sampling_rate)
        df_data.append(data.loc[:,features_prosody])

    df= pd.concat(df_data, ignore_index=True)
    return df
        
def t_test(dataframe):
    for i in features_prosody:
        class_0= dataframe[i][dataframe['label']==0].values
        class_1= dataframe[i][dataframe['label']== 1].values 
        t_statistic, p_value = stats.ttest_ind(class_0, class_1)
        print(f"T-statistic {i}:", t_statistic)
        print(f"P-value {i}:", p_value)
    

In [2]:
df=pd.read_csv("D:/CCiL/TFM/model/data/data_spanish_dataset.csv")

# Encoding categorical variables. 
df['label'] = df['label'].apply(lambda x: encode(x))
df['gender']= df['gender'].apply(lambda x: encode(x))
df['type of discouse']=df['type of discouse'].apply(lambda x: encode(x))

#Storing labels in array 
labels= df['label'].values
enc_gender= df['gender'].values
enc_discourse= df['type of discouse'].values


In [7]:
#process wav files 
path='data/clean_audio1'
dir_list = os.listdir(path)
feature_data= functional_features(dir_list)


100%|██████████| 100/100 [05:35<00:00,  3.36s/it]


In [8]:
#Adds encoded data 
feature_data['gender']=enc_gender
feature_data['type of discourse']=enc_discourse
feature_data['label']=labels

df= feature_data


In [9]:
#Performs t-test 
t_test(df)

T-statistic F0semitoneFrom27.5Hz_sma3nz_amean: 2.2487934773623564
P-value F0semitoneFrom27.5Hz_sma3nz_amean: 0.026764685823525612
T-statistic loudness_sma3_amean: -2.9324582955450342
P-value loudness_sma3_amean: 0.004186531896733063
T-statistic mfcc1_sma3_amean: 2.0907844943307077
P-value mfcc1_sma3_amean: 0.039134807280088044
T-statistic jitterLocal_sma3nz_amean: -1.1565093128645185
P-value jitterLocal_sma3nz_amean: 0.2502843380505851
T-statistic shimmerLocaldB_sma3nz_amean: -3.0798712117624505
P-value shimmerLocaldB_sma3nz_amean: 0.002687346355337155
T-statistic hammarbergIndexV_sma3nz_amean: 4.132627428485812
P-value hammarbergIndexV_sma3nz_amean: 7.563214277725204e-05


In [18]:
# Select the column you want to normalize
column_to_normalize = 'F0semitoneFrom27.5Hz_sma3nz_amean'

# Extract the column as a numpy array
column_data = df[column_to_normalize].values.reshape(-1, 1)

# Normalize the column
scaler = StandardScaler()
normalized_column = scaler.fit_transform(column_data)

# Replace the original column with the normalized values
df[column_to_normalize] = normalized_column
df


Unnamed: 0,F0semitoneFrom27.5Hz_sma3nz_amean,loudness_sma3_amean,mfcc1_sma3_amean,jitterLocal_sma3nz_amean,shimmerLocaldB_sma3nz_amean,hammarbergIndexV_sma3nz_amean,gender,type of discourse,label
0,1.458993,0.831961,13.389601,0.041023,1.282228,18.936230,0,1,0
1,1.419710,1.764720,6.720259,0.084745,2.403434,8.263502,1,0,1
2,0.500541,1.251257,19.586304,0.029870,1.202914,19.541899,0,1,0
3,0.481301,1.528133,22.042315,0.063728,1.433772,20.837244,0,2,1
4,-0.277204,1.173489,28.461868,0.033654,1.195717,19.314997,2,5,1
...,...,...,...,...,...,...,...,...,...
95,-0.422237,1.018759,29.975269,0.032481,1.155741,26.601654,0,7,1
96,-1.686526,1.243195,20.095507,0.046844,1.489741,19.988497,2,5,1
97,0.808590,2.032593,15.154454,0.037003,1.185197,15.772832,0,2,0
98,-0.045360,0.664796,30.143198,0.039862,1.347563,27.701916,0,7,0


In [19]:
#Train/Test split 
y_axis= df.iloc[:,[-1]]
y= y_axis.to_numpy().flatten() #Labels (class 0 and 1)
X_axis= df.drop(y_axis.columns,axis = 1)
X= X_axis.values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)

In [20]:
svc= SVC(kernel='rbf', class_weight='balanced', C=1, gamma=1e-3)
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
confusion_matrix(y_test, y_pred)


array([[ 5,  2],
       [ 1, 12]], dtype=int64)

In [21]:
#Cross-validation recall

C_list= np.linspace(0.1, 10, 20)
gamma_list= np.logspace(-4, -2, 20)
precision = np.zeros((len(C_list), len(gamma_list)))


for i in range(len(C_list)):
     for j in range(len(gamma_list)):
        svc= SVC(C=C_list[i], gamma=gamma_list[j], kernel='rbf')
        svc.fit(X_train, y_train)
        y_pred= svc.predict(X_val)
        tn, fp, fn, tp = confusion_matrix(y_val, y_pred).ravel()
        precision[i, j] = float(tp+tn) / (tp + fn + fp + tn)
               

index_max = np.unravel_index(np.argmax(recall), recall.shape)

print('Max. precision:{}'.format(precision[index_max]))
C_opt = C_list[index_max[0]]
gamma_opt = gamma_list[index_max[1]]
print('Optimal C: {}'.format(C_opt))
print('Optimal gamma:{}'.format(gamma_opt))

Max. recall:0.75
Optimal C: 0.6210526315789474
Optimal gamma:0.0011288378916846883


In [22]:
svc= SVC(kernel='rbf', class_weight='balanced', C=C_opt, gamma=gamma_opt)
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
confusion_matrix(y_test, y_pred)

array([[ 5,  2],
       [ 2, 11]], dtype=int64)

In [28]:
#Validation of model (Precision)

kf = KFold(n_splits=10, shuffle=True)
scores = [] 
for train_idx, val_idx in kf.split(X):
    X_train_k, y_train_k = X[train_idx], y[train_idx]
    X_val_k, y_val_k = X[val_idx], y[val_idx]
    svc= SVC(C=C_opt, gamma=gamma_opt, kernel='rbf', class_weight='balanced')
    svc.fit(X_train_k, y_train_k)
    y_pred_k= svc.predict(X_val_k)
    cm= confusion_matrix(y_val_k, y_pred_k)
    tn, fp, fn, tp = confusion_matrix(y_val_k, y_pred_k).ravel()
    precision_v = float(tp+tn) / (tp + fn + fp + tn)
    scores.append(precision_v)

print('Precision mean: {}'.format(np.mean(scores))) 
print('Precision standard deviation: {}'.format(np.std(scores)))

Precision mean: 0.6599999999999999
Precision standard deviation: 0.17435595774162696


In [31]:
#Did we achieve overfitting??? Let's see 

svc= SVC(kernel='rbf', class_weight='balanced', C=C_opt, gamma=gamma_opt)
svc.fit(X_train, y_train)
y_pred = svc.predict(X_train)
cm= confusion_matrix(y_train, y_pred)
tn, fp, fn, tp = confusion_matrix(y_train, y_pred).ravel()
precision = float(tp+tn) / (tp + fn + fp + tn)
print(precision)

0.59375
