# Classification 

In [1]:
import pandas as pd

In [2]:
df = pd.read_pickle("../data/Video2Features.pkl")

In [7]:
df.head()

Unnamed: 0,FILE,SEG,CLASS_1,0,1,2,3,4,5,6,...,25078,25079,25080,25081,25082,25083,25084,25085,25086,25087
0,speaker1,0,boring,0.0,3.450885,1.179175,0.0,0.0,0.0,2.813062,...,20.059443,0.0,1.031235,0.0,0.0,0.0,1.870711,6.2898,19.62705,0.403226
1,speaker1,1,neutral,0.0,15.276594,1.038574,0.0,0.0,0.0,2.16167,...,26.400854,0.0,0.956505,0.0,4.346704,0.0,0.454285,0.0,28.739689,0.0
2,speaker2,0,neutral,0.0,3.450885,1.179175,0.0,0.0,0.0,2.813062,...,20.059443,0.0,1.031235,0.0,0.0,0.0,1.870711,6.2898,19.62705,0.403226
3,speaker2,1,exciting,0.0,15.276594,1.038574,0.0,0.0,0.0,2.16167,...,26.400854,0.0,0.956505,0.0,4.346704,0.0,0.454285,0.0,28.739689,0.0


In [39]:
def split_train_test(df, speaker):
    '''
    Splits the provided dataframe (audio & video fetures with
    speakers and labels) to train and test based on speaker 
    (one vs all).

    :param df:          dataframe with audio & video data
    :param speaker:     speaker for the 1 vs all

    '''
    
    # Dataframe Columns
    c_file = 'FILE'
    c_drop = ['FILE', 'SEG']
    c_label = 'CLASS_1'
    
    # Train
    train = df[df[c_file] != speaker].drop(c_drop, axis=1)
    train_Y = train[c_label]
    train_X = train[train.columns[1:]]
    
    # Test
    test = df[df[c_file] == speaker].drop(c_drop, axis=1)
    test_Y = test[c_label]
    test_X = test[test.columns[1:]]
    
    return train_X, train_Y, test_X, test_Y
    

In [45]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import statistics as st

def evaluate_training(df):
    '''
    Evaluates the algorithm performance on learning the provided
    dataset. Run in rounds - each speaker versus all
    
    :param df:          dataframe with audio & video data
    
    '''
    # Unique speakers
    speakers = df['FILE'].unique()
    
    # Array to hold model accuracy for each round
    acc_array = []

    for speaker in speakers:

        # Split dataframe to train-test based on current speaker
        train_X, train_Y, test_X, test_Y = split_train_test(df, speaker)

        # Create the model
        model = SVC(kernel='rbf')

        # Fit
        model.fit(train_X, train_Y)

        # Predict 
        pred_Y = model.predict(test_X)

        # Evaluate and append
        acc_array.append(accuracy_score(test_Y, pred_Y))
        
    return st.mean(acc_array) 

In [46]:
evaluate_training(df)



0.0