## Affective Content Prediction
### LIRIS-ACCEDE dataset: original dataset composed of 9800 video clips extracted from 160 movies

#### Task 1: Predict the valence and arousal classes of movie clips  
valence (negative-neutral-positive)  
arousal (calm-neutral-excited)

In [1]:
import os
import numpy as np
import pandas as pd

#### In the first part, we use all the features provided

In [2]:
# read in the features of arousal
with open('features/ACCEDEfeaturesArousal_TAC2015.txt') as file:
    lines = file.readlines()
    data = [line.replace('\n','').split('\t') for line in lines]
    
# make a dataframe from it
a = np.asarray(data)
index = a[1:,1]
col = a[0,:]
a = a[1:,:]
df_arousal = pd.DataFrame(data=a, index=index, columns=col).drop(['id','name'],axis=1)

# read in the features of arousal
with open('features/ACCEDEfeaturesValence_TAC2015.txt') as file:
    lines = file.readlines()
    data = [line.replace('\n','').split('\t') for line in lines]
    
# make a dataframe from it
a = np.asarray(data)
index = a[1:,1]
col = a[0,:]
a = a[1:,:]
df_valence = pd.DataFrame(data=a, index=index, columns=col).drop(['id','name'],axis=1)
df_valence.head(2)

Unnamed: 0,colorfulness,alpha,hueCount,maxSaliencyCount,compositionalBalance,depthOfField,saliencyDisparity,spatialEdgeDistributionArea,entropyComplexity,nbWhiteFrames,nbFades,nbSceneCuts,asymmetry_env,flatness,zcr,colorStrength,colorRawEnergy
ACCEDE00000,47.939117,5,100,0.002094,4.59312,110.988586,0.015579,0.842532,12.011075,0,0,1,19.727504,0.000287,0.319205,0.113005,162.118626
ACCEDE00001,52.748894,41,45,0.004708,1.085655,125.017303,0.022905,0.860802,12.795528,0,0,1,57.945141,0.0,0.194707,0.072096,11.467636


In [3]:
# read the annotation
with open('annotations/ACCEDEaffect.txt') as file:
    lines = file.readlines()
    annotate = [line.replace('\n','').split('\t') for line in lines]
b = np.asarray(annotate)
index = b[1:,1]
col = b[0,:]
b = b[1:,:]
df_annotate = pd.DataFrame(data=b, index=index, columns=col).drop(['id','name'],axis=1)

## Examine the class distribution
print (df_annotate['valenceClass'].value_counts())
print (df_annotate['arousalClass'].value_counts())

0     3552
-1    3411
1     2837
Name: valenceClass, dtype: int64
-1    6206
1     2247
0     1347
Name: arousalClass, dtype: int64


In [4]:
# read the sets (train:1, test:0, validation:2)
with open('annotations/ACCEDEsets.txt') as file:
    lines = file.readlines()
    annotate = [line.replace('\n','').split('\t') for line in lines]
b = np.asarray(annotate)
index = b[1:,1]
col = b[0,:]
b = b[1:,:]
df_set = pd.DataFrame(data=b, index=index, columns=col).drop(['id','name'],axis=1)
df_set['set'] = df_set['set'].astype(int)

valence = pd.concat([df_valence, df_annotate['valenceClass'].astype(int), df_set['set']], axis=1)
arousal = pd.concat([df_arousal, df_annotate['arousalClass'].astype(int), df_set['set']], axis=1)

In [5]:
valence.dtypes

colorfulness                   object
alpha                          object
hueCount                       object
maxSaliencyCount               object
compositionalBalance           object
depthOfField                   object
saliencyDisparity              object
spatialEdgeDistributionArea    object
entropyComplexity              object
nbWhiteFrames                  object
nbFades                        object
nbSceneCuts                    object
asymmetry_env                  object
flatness                       object
zcr                            object
colorStrength                  object
colorRawEnergy                 object
valenceClass                    int32
set                             int32
dtype: object

In [6]:
#split training and testing set
trainV = valence.loc[valence['set']==1] 
trainA = arousal.loc[arousal['set']==1] 
testV = valence.loc[valence['set']==0] 
testA = arousal.loc[arousal['set']==0] 
valV = valence.loc[valence['set']==2] 
valA = arousal.loc[arousal['set']==2] 
print('Train Set')
print('Valence:', trainV.shape, '  Arousal:',trainA.shape)

Train Set
Valence: (2450, 19)   Arousal: (2450, 25)


In [7]:
arousal_feature = ['colorfulness','minEnergy','alpha','lightning','globalActivity'
                            ,'nbWhiteFrames','nbSceneCuts','cutLength','flatness_env'
                            ,'wtf_max2stdratio_1','wtf_max2stdratio_2','wtf_max2stdratio_3'
                            ,'wtf_max2stdratio_4','wtf_max2stdratio_5','wtf_max2stdratio_6'
                            ,'wtf_max2stdratio_7','wtf_max2stdratio_8','wtf_max2stdratio_9'
                            ,'wtf_max2stdratio_10','wtf_max2stdratio_11','wtf_max2stdratio_12'
                            ,'medianLightness','slope']
valence_feature = ['colorfulness','alpha','hueCount','maxSaliencyCount','compositionalBalance'
                                       ,'depthOfField','saliencyDisparity','spatialEdgeDistributionArea'
                                       ,'entropyComplexity','nbWhiteFrames','nbFades','nbSceneCuts','asymmetry_env'
                                       ,'flatness','zcr','colorStrength','colorRawEnergy']

train_arousal = np.asarray(trainA[arousal_feature]).astype(float)
train_valence = np.asarray(trainV[valence_feature]).astype(float)
test_arousal = np.asarray(testA[arousal_feature]).astype(float)
test_valence = np.asarray(testV[valence_feature]).astype(float)
val_arousal = np.asarray(valA[arousal_feature]).astype(float)
val_valence = np.asarray(valV[valence_feature]).astype(float)

label_trn = np.vstack( (trainV['valenceClass'].values,trainA['arousalClass'].values))
label_tst = np.vstack( (testV['valenceClass'].values, testA['arousalClass'].values))
label_val = np.vstack( (valV['valenceClass'].values, valA['arousalClass'].values))

In [8]:
# normalize the data

from sklearn.preprocessing import StandardScaler

scalerA = StandardScaler().fit(train_arousal)
X_train_a = scalerA.transform(train_arousal)
X_test_a = scalerA.transform(test_arousal)
X_val_a = scalerA.transform(val_arousal)

scalerV = StandardScaler().fit(train_valence)
X_train_v = scalerV.transform(train_valence)
X_test_v = scalerV.transform(test_valence)
X_val_v = scalerV.transform(val_valence)

In [9]:
# Concatenate to treat training and validation set as a total training set when testing
X_total_v = np.vstack((X_train_v, X_val_v))
X_total_a = np.vstack((X_train_a, X_val_a))
label_total= np.hstack((label_trn, label_val ))

In [11]:
from sklearn import linear_model
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix

def findAcc(p,y):
    return len(np.where((p - y) == 0)[0]) / float(len(p))

lr = linear_model.LogisticRegression()
for i in range(2):
    if i == 0:
        print('############## Valence Prediction ####################')
        trnX = X_train_v
        valX = X_val_v
        tstX = X_test_v
        totalX = X_total_v
    else:
        print('############## Arousal Prediction ####################')
        trnX = X_train_a
        valX = X_val_a
        tstX = X_test_a
        totalX = X_total_a
        
    trnY = label_trn[i]
    tstY = label_tst[i]
    valY = label_val[i]
    totalY = label_total[i]
    
    ############## Random Forest Classifier ###########
    # choose param
    bestNum = 0
    bestAcc = 0
    for tree in range(10,100,10):
        rf = RandomForestClassifier(n_estimators = tree)
        rf.fit(trnX, trnY)
        pred = rf.predict(valX)
        acc = findAcc(pred, valY)
        print('RF(%i) accuracy: %f' %(tree,acc))
        if acc>bestAcc:
            bestAcc = acc
            bestNum = tree
    
    # test
    final_rf = RandomForestClassifier(n_estimators = bestNum)
    final_rf.fit(totalX, totalY)
    pred = final_rf.predict(tstX)
    acc = findAcc(pred, tstY)
    con_rf = confusion_matrix(tstY, pred)
    print('[TESTING] RF (%i) accuracy: %f' %(bestNum, acc))
        
    ############## KNN Classifier ###########
    # choose param
    bestNum = 0
    bestAcc = 0
    for neighbor in range(3,9,2):
        knn = KNeighborsClassifier(n_neighbors=neighbor)
        knn.fit(trnX, trnY)
        pred = knn.predict(valX)
        acc = findAcc(pred, valY)
        print('KNN(%i) accuracy: %f' %(neighbor,acc))
        if acc>bestAcc:
            bestAcc = acc
            bestNum = neighbor
    
    # test
    final_knn = KNeighborsClassifier(n_neighbors=bestNum)
    final_knn.fit(totalX, totalY)
    pred = final_knn.predict(tstX)
    acc = findAcc(pred, tstY)
    print('[TESTING] KNN (%i) accuracy: %f' %(bestNum, acc))
    con_knn = confusion_matrix(tstY, pred)
        
    ############# Logistic Regression #############
    lr.fit(totalX, totalY)
    pred = lr.predict(tstX)
    acc = findAcc(pred, tstY)
    print('[TESTING] LR accuracy: %f' %(acc))
    con_lr = confusion_matrix(tstY, pred)


############## Valence Prediction ####################
RF(10) accuracy: 0.352245
RF(20) accuracy: 0.375102
RF(30) accuracy: 0.366122
RF(40) accuracy: 0.373061
RF(50) accuracy: 0.355918
RF(60) accuracy: 0.363265
RF(70) accuracy: 0.366939
RF(80) accuracy: 0.366939
RF(90) accuracy: 0.365306
[TESTING] RF (20) accuracy: 0.403469
KNN(3) accuracy: 0.337551
KNN(5) accuracy: 0.354286
KNN(7) accuracy: 0.362041
[TESTING] KNN (7) accuracy: 0.380000
[TESTING] LR accuracy: 0.429184
############## Arousal Prediction ####################
RF(10) accuracy: 0.597959
RF(20) accuracy: 0.621633
RF(30) accuracy: 0.618367
RF(40) accuracy: 0.631020
RF(50) accuracy: 0.629796
RF(60) accuracy: 0.630612
RF(70) accuracy: 0.631429
RF(80) accuracy: 0.635102
RF(90) accuracy: 0.632245
[TESTING] RF (80) accuracy: 0.645102
KNN(3) accuracy: 0.580408
KNN(5) accuracy: 0.611429
KNN(7) accuracy: 0.611837
[TESTING] KNN (7) accuracy: 0.609592
[TESTING] LR accuracy: 0.636122


### We now add audio features that we extracted by ourselves

In [12]:
# read file and parse
with open('features/Output.txt') as file:
    lines = file.readlines()
    f = [line.replace(' ','').split(',') for line in lines]

col = f[0]
f_array = np.asarray(f)
# delete some weird rows
ff = np.delete(f_array, 1474, 0)
ff = np.delete(ff,5515,0)
index = ff[1:,0]
audio = pd.DataFrame(data=ff[1:,0:7], index = index, columns = col[0:7]).drop('audioID',axis=1)

In [13]:
# merge the dataframes
df_valence_audio = pd.merge(df_valence, audio, left_index=True, right_index=True)
df_arousal_audio = pd.merge(df_arousal, audio, left_index=True, right_index=True)
valence_audio = pd.concat([df_valence_audio, df_annotate['valenceClass'].astype(int), df_set['set']], axis=1)
arousal_audio = pd.concat([df_arousal_audio, df_annotate['arousalClass'].astype(int), df_set['set']], axis=1)

In [14]:
### Use the same ML flow as above
#split training and testing set
trainV = valence_audio.loc[valence_audio['set']==1] 
trainA = arousal_audio.loc[arousal_audio['set']==1] 
testV = valence_audio.loc[valence_audio['set']==0] 
testA = arousal_audio.loc[arousal_audio['set']==0] 
valV = valence_audio.loc[valence_audio['set']==2] 
valA = arousal_audio.loc[arousal_audio['set']==2] 

print('Train Set')
print('Valence:', trainV.shape, '  Arousal:',trainA.shape)

train_arousal = np.asarray(trainA).astype(float)
train_valence = np.asarray(trainV).astype(float)
test_arousal = np.asarray(testA).astype(float)
test_valence = np.asarray(testV).astype(float)
val_arousal = np.asarray(valA).astype(float)
val_valence = np.asarray(valV).astype(float)

label_trn = np.vstack( (trainV['valenceClass'].values, trainA['arousalClass'].values))
label_tst = np.vstack( (testV['valenceClass'].values, testA['arousalClass'].values))
label_val = np.vstack( (valV['valenceClass'].values, valA['arousalClass'].values))

# normalize the data
scalerA = StandardScaler().fit(train_arousal)
X_train_a = scalerA.transform(train_arousal)
X_test_a = scalerA.transform(test_arousal)
X_val_a = scalerA.transform(val_arousal)

scalerV = StandardScaler().fit(train_valence)
X_train_v = scalerV.transform(train_valence)
X_test_v = scalerV.transform(test_valence)
X_val_v = scalerV.transform(val_valence)

X_total_v = np.vstack((X_train_v, X_val_v))
X_total_a = np.vstack((X_train_a, X_val_a))
label_total= np.hstack((label_trn, label_val ))

Train Set
Valence: (2450, 25)   Arousal: (2450, 31)


In [15]:
lr = linear_model.LogisticRegression()
for i in range(2):
    if i == 0:
        print('############## Valence Prediction ####################')
        trnX = X_train_v
        valX = X_val_v
        tstX = X_test_v
        totalX = X_total_v
    else:
        print('############## Arousal Prediction ####################')
        trnX = X_train_a
        valX = X_val_a
        tstX = X_test_a
        totalX = X_total_a
        
    trnY = label_trn[i]
    tstY = label_tst[i]
    valY = label_val[i]
    totalY = label_total[i]
    
    ############## Random Forest Classifier ###########
    # choose param
    bestNum = 0
    bestAcc = 0
    for tree in range(10,100,10):
        rf = RandomForestClassifier(n_estimators = tree)
        rf.fit(trnX, trnY)
        pred = rf.predict(valX)
        acc = findAcc(pred, valY)
        print('RF(%i) accuracy: %f' %(tree,acc))
        if acc>bestAcc:
            bestAcc = acc
            bestNum = tree
    
    # test
    final_rf = RandomForestClassifier(n_estimators = bestNum)
    final_rf.fit(totalX, totalY)
    pred = final_rf.predict(tstX)
    acc = findAcc(pred, tstY)
    con_rf = confusion_matrix(tstY, pred)
    print('[TESTING] RF (%i) accuracy: %f' %(bestNum, acc))
        
    ############## KNN Classifier ###########
    # choose param
    bestNum = 0
    bestAcc = 0
    for neighbor in range(3,9,2):
        knn = KNeighborsClassifier(n_neighbors=neighbor)
        knn.fit(trnX, trnY)
        pred = knn.predict(valX)
        acc = findAcc(pred, valY)
        print('KNN(%i) accuracy: %f' %(neighbor,acc))
        if acc>bestAcc:
            bestAcc = acc
            bestNum = neighbor
    
    # test
    final_knn = KNeighborsClassifier(n_neighbors=bestNum)
    final_knn.fit(totalX, totalY)
    pred = final_knn.predict(tstX)
    acc = findAcc(pred, tstY)
    print('[TESTING] KNN (%i) accuracy: %f' %(bestNum, acc))
    con_knn = confusion_matrix(tstY, pred)
        
    ############# Logistic Regression #############
    lr.fit(totalX, totalY)
    pred = lr.predict(tstX)
    acc = findAcc(pred, tstY)
    print('[TESTING] LR accuracy: %f' %(acc))
    con_lr = confusion_matrix(tstY, pred)

############## Valence Prediction ####################
RF(10) accuracy: 0.998776
RF(20) accuracy: 1.000000
RF(30) accuracy: 1.000000
RF(40) accuracy: 1.000000
RF(50) accuracy: 1.000000
RF(60) accuracy: 1.000000
RF(70) accuracy: 1.000000
RF(80) accuracy: 1.000000
RF(90) accuracy: 1.000000
[TESTING] RF (20) accuracy: 1.000000
KNN(3) accuracy: 0.757959
KNN(5) accuracy: 0.790204
KNN(7) accuracy: 0.812653
[TESTING] KNN (7) accuracy: 0.850000
[TESTING] LR accuracy: 1.000000
############## Arousal Prediction ####################
RF(10) accuracy: 0.994694
RF(20) accuracy: 0.998367
RF(30) accuracy: 1.000000
RF(40) accuracy: 0.999592
RF(50) accuracy: 1.000000
RF(60) accuracy: 1.000000
RF(70) accuracy: 1.000000
RF(80) accuracy: 1.000000
RF(90) accuracy: 1.000000
[TESTING] RF (30) accuracy: 0.999796
KNN(3) accuracy: 0.808571
KNN(5) accuracy: 0.815102
KNN(7) accuracy: 0.819592
[TESTING] KNN (7) accuracy: 0.858367
[TESTING] LR accuracy: 0.636122
