In [None]:
#importing the dataset to pandas

import pandas as pd
fer=pd.read_csv('fer2018_data.csv')

#Normalizing the dataset excluding column 'emotion'
normalized_fer=fer/255
normalized_fer['emotion']=fer['emotion']

#Prints first five rows of normalized dataset
print(normalized_fer.head())

#Randomizing the dataset
import numpy as np
np.random.seed(1)
shuffled_index=np.random.permutation(normalized_fer.index)
normalized_fer=normalized_fer.loc[shuffled_index]

#Prints first five rows of randomized dataset
print(normalized_fer.head())

#Preparing the training dataset
train_data = normalized_fer.iloc[0:25120]
train_features=train_data.loc[:,'p1':'p2304']
train_target=train_data['emotion']

#Training the model with training dataset
from sklearn.neighbors import KNeighborsRegressor
knn_model=KNeighborsRegressor(n_neighbors=3,algorithm='brute')
knn_model.fit(train_features,train_target)

#Preparing the testing dataset
#we are not able to do the testing on the whole testing set 
#we are now splitting the testing set into two

test_data1 = normalized_fer.iloc[25120:30000]
test_features=test_data1.loc[:,'p1':'p2304']
test_target=test_data1['emotion']

#Testing the model on this test set
predictions=knn_model.predict(test_features)

#Prepare other half of test set and testing the model on that
test_data = normalized_fer.iloc[30000:]
test_features=test_data.loc[:,'p1':'p2304']
test_target=test_data['emotion']
predictions_new=knn_model.predict(test_features)

#Concatenating the two predictions
y_predict=np.concatenate((predictions,predictions_new),axis=0)

#Calculating rmse of prediction vs test_target
test_data = normalized_fer.iloc[25120:]
test_target=test_data['emotion']

from sklearn.metrics import mean_squared_error

mse=mean_squared_error(test_target,y_predict)
rmse=mse**(1/2)
print(mse)
print(rmse)

#Finding the correlation coefficients

def read_file(filename):
    pd_data=pd.read_csv(filename)
    normalized_data=pd_data/255
    normalized_data['emotion']=pd_data['emotion']
    return normalized_data

def correlation_coeff():
    file_list=['fer2018_angry.csv','fer2018_disgust.csv','fer2018_fear.csv','fer2018_happy.csv','fer2018_sad.csv','fer2018_surprise.csv','fer2018_neutral.csv']
    all_two=[]
    all_five=[]
    all_ten=[]
    
    for file_name in file_list:
        print('started')
        data=read_file(file_name)
        print("read {0}".format(file_name))
        corr_coeff=data.corr()['emotion'].sort_values(ascending=False).head(11)
        print("found corr of {0}".format(file_name))
        top_two_feature=corr_coeff[1:3].index.tolist()
        top_five_feature=corr_coeff[1:6].index.tolist()
        top_ten_feature=corr_coeff[1:].index.tolist()
        all_two+=top_two_feature
        all_five+=top_five_feature
        all_ten+=top_ten_feature
        print("finished {0}".format(file_name))
    print(all_two,all_five,all_ten)    
    return all_two,all_five,all_ten 

import pandas as pd
two_corr,five_corr,ten_corr=correlation_coeff()


   emotion        p1        p2        p3        p4        p5        p6  \
0        0  0.274510  0.313725  0.321569  0.282353  0.227451  0.227451   
1        0  0.592157  0.588235  0.576471  0.607843  0.580392  0.521569   
2        2  0.905882  0.831373  0.611765  0.643137  0.682353  0.541176   
3        4  0.094118  0.125490  0.141176  0.117647  0.125490  0.090196   
4        6  0.015686  0.000000  0.000000  0.000000  0.000000  0.000000   

         p7        p8        p9  ...     p2295     p2296     p2297     p2298  \
0  0.235294  0.247059  0.211765  ...  0.623529  0.713725  0.717647  0.533333   
1  0.435294  0.549020  0.666667  ...  0.411765  0.423529  0.372549  0.423529   
2  0.631373  0.678431  0.713725  ...  0.407843  0.541176  0.596078  0.478431   
3  0.074510  0.078431  0.117647  ...  0.682353  0.494118  0.517647  0.517647   
4  0.000000  0.000000  0.000000  ...  0.047059  0.133333  0.121569  0.121569   

      p2299     p2300     p2301     p2302     p2303     p2304  
0  0.41568

In [None]:
#Perform KNN using two_corr
train_data = normalized_fer.iloc[0:25120]
test_data = normalized_fer.iloc[25120:]

train_features=train_data.loc[:,two_corr]
train_target=train_data['emotion']

test_features=test_data.loc[:,two_corr]
test_target=test_data['emotion']


from sklearn.neighbors import KNeighborsRegressor
knn_model=KNeighborsRegressor(n_neighbors=3,algorithm='brute')
knn_model.fit(train_features,train_target)
predictions=knn_model.predict(test_features)

mse=mean_squared_error(test_target,predictions)
rmse=mse**(1/2)
print(mse)
print(rmse)

# Perform KNN using five_corr
train_features=train_data.loc[:,five_corr]
train_target=train_data['emotion']

test_features=test_data.loc[:,five_corr]
test_target=test_data['emotion']

from sklearn.neighbors import KNeighborsRegressor
knn_model=KNeighborsRegressor(n_neighbors=3,algorithm='brute')
knn_model.fit(train_features,train_target)
predictions=knn_model.predict(test_features)
mse=mean_squared_error(test_target,predictions)
rmse=mse**(1/2)
print(mse)
print(rmse)

# Perform KNN using ten_corr

train_features=train_data.loc[:,ten_corr]
train_target=train_data['emotion']

test_features=test_data.loc[:,ten_corr]
test_target=test_data['emotion']

from sklearn.neighbors import KNeighborsRegressor
knn_model=KNeighborsRegressor(n_neighbors=5,algorithm='brute')
knn_model.fit(train_features,train_target)
predictions=knn_model.predict(test_features)
mse=mean_squared_error(test_target,predictions)
rmse=mse**(1/2)
print(mse)
print(rmse)

#Doing KNN with KFold Cross Validation and all features
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score,KFold
features=normalized_fer.loc[:,'p1':'p2304']
target=normalized_fer['emotion']
kf=KFold(10,shuffle=True,random_state=1)
knn_model=KNeighborsRegressor(n_neighbors=3,algorithm='brute')
mses=cross_val_score(knn_model,train_features,target,scoring='neg_mean_squared_error',cv=kf)
rmses=np.sqrt(np.absolute(mses))
avg_rmse=np.mean(rmses)
print(rmses)
print(avg_rmse)

# Doing KNN with KFold Cross Validation and two_corr

from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score,KFold
features=normalized_fer.loc[:,two_corr]
target=normalized_fer['emotion']
kf=KFold(10,shuffle=True,random_state=1)
knn_model=KNeighborsRegressor(n_neighbors=3,algorithm='brute')
mses=cross_val_score(knn_model,train_features,target,scoring='neg_mean_squared_error',cv=kf)
rmses=np.sqrt(np.absolute(mses))
avg_rmse=np.mean(rmses)
print(rmses)
print(avg_rmse)

# Doing KNN with KFold Cross Validation and five_corr

from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score,KFold
features=normalized_fer.loc[:,five_corr]
target=normalized_fer['emotion']
kf=KFold(10,shuffle=True,random_state=1)
knn_model=KNeighborsRegressor(n_neighbors=3,algorithm='brute')
mses=cross_val_score(knn_model,train_features,target,scoring='neg_mean_squared_error',cv=kf)
rmses=np.sqrt(np.absolute(mses))
avg_rmse=np.mean(rmses)
print(rmses)
print(avg_rmse)

# Doing KNN with KFold Cross Validation and ten_corr

from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score,KFold
features=normalized_fer.loc[:,five_corr]
target=normalized_fer['emotion']
kf=KFold(10,shuffle=True,random_state=1)
knn_model=KNeighborsRegressor(n_neighbors=3,algorithm='brute')
mses=cross_val_score(knn_model,train_features,target,scoring='neg_mean_squared_error',cv=kf)
rmses=np.sqrt(np.absolute(mses))
avg_rmse=np.mean(rmses)
print(rmses)
print(avg_rmse)
