In [1]:
import numpy as np
import pandas as pd
import plotly
import plotly.express as px

## Exploring clustering algorithm & its labels on train+test

In [2]:
df_train = pd.read_csv('portrait_emotions_labels.csv')
df_train

Unnamed: 0,filename,emotion,label
0,image_06637.jpg,angry,1
1,image_06638.jpg,angry,1
2,image_06639.jpg,disgust,6
3,image_06641.jpg,neutral,7
4,image_06642.jpg,sad,2
...,...,...,...
5831,image_08036.jpg,angry,1
5832,image_08035.jpg,neutral,7
5833,image_08038.jpg,sad,2
5834,image_08028.jpg,fear,4


In [3]:
df_test = pd.read_csv('rijks_emotion_cluster1.csv')
df_test

Unnamed: 0,filename,emotion
0,img_1.jpg,surprised
1,img_10.jpg,disgust
2,img_100.jpg,angry
3,img_101.jpg,fear
4,img_102.jpg,angry
...,...,...
761,img_95.jpg,surprised
762,img_96.jpg,surprised
763,img_97.jpg,happy
764,img_98.jpg,happy


In [4]:
def categorize(row):
    if row['emotion'] == 'angry':
        return 1
    elif row['emotion'] == 'sad':
        return 2
    elif row['emotion'] == 'happy':
        return 3
    elif row['emotion'] == 'fear':
        return 4
    elif row['emotion'] == 'surprised':
        return 5
    elif row['emotion'] == 'disgust':
        return 6
    else:
        return 7

In [5]:
df_test['label'] = df_test.apply(lambda x: categorize(x), axis=1)
df_test

Unnamed: 0,filename,emotion,label
0,img_1.jpg,surprised,5
1,img_10.jpg,disgust,6
2,img_100.jpg,angry,1
3,img_101.jpg,fear,4
4,img_102.jpg,angry,1
...,...,...,...
761,img_95.jpg,surprised,5
762,img_96.jpg,surprised,5
763,img_97.jpg,happy,3
764,img_98.jpg,happy,3


In [7]:
fig = px.bar(
    x = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprised'],
    y = [list(df_train['emotion'].values).count(i) for i in np.unique(df_train['emotion'].values)], 
    color = np.unique(df_train['emotion'].values),
    color_continuous_scale="Emrld"
)

fig.update_xaxes(title="Emotions")
fig.update_yaxes(title = "No. of Images")
fig.update_layout(
    showlegend = True,
    title = {
        'text': 'Clustered Train Data ',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    }
)

fig.show()

In [8]:
fig = px.bar(
    x = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprised'],
    y = [list(df_test['emotion'].values).count(i) for i in np.unique(df_test['emotion'].values)], 
    color = np.unique(df_test['emotion'].values),
    color_continuous_scale="Emrld"
)

fig.update_xaxes(title="Emotions")
fig.update_yaxes(title = "No. of Images")
fig.update_layout(
    showlegend = True,
    title = {
        'text': 'Clustered Rijks Data ',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    }
)

fig.show()

## Comparing model and human labels for Rijksmuseum

In [9]:
df_annotated = pd.read_csv('cropped_images_labelled.csv')
df_annotated

Unnamed: 0,filename,emotion
0,cropped_images/img_1.jpg,Neutral
1,cropped_images/img_2.jpg,Neutral
2,cropped_images/img_3.jpg,Neutral
3,cropped_images/img_4.jpg,Neutral
4,cropped_images/img_5.jpg,Angry
...,...,...
761,cropped_images/img_762.jpg,Sad
762,cropped_images/img_763.jpg,Neutral
763,cropped_images/img_764.jpg,Neutral
764,cropped_images/img_765.jpg,Happy


In [10]:
def categorize(row):
    if row['emotion'] == 'Angry':
        return 1
    elif row['emotion'] == 'Sad':
        return 2
    elif row['emotion'] == 'Happy':
        return 3
    elif row['emotion'] == 'Fear':
        return 4
    elif row['emotion'] == 'Surprise':
        return 5
    elif row['emotion'] == 'Disgust':
        return 6
    else:
        return 7

In [11]:
df_annotated['label'] = df_annotated.apply(lambda x: categorize(x), axis=1)
df_annotated

Unnamed: 0,filename,emotion,label
0,cropped_images/img_1.jpg,Neutral,7
1,cropped_images/img_2.jpg,Neutral,7
2,cropped_images/img_3.jpg,Neutral,7
3,cropped_images/img_4.jpg,Neutral,7
4,cropped_images/img_5.jpg,Angry,1
...,...,...,...
761,cropped_images/img_762.jpg,Sad,7
762,cropped_images/img_763.jpg,Neutral,7
763,cropped_images/img_764.jpg,Neutral,7
764,cropped_images/img_765.jpg,Happy,3


In [12]:
df_vgg = pd.read_csv('predictions/vgg19_predictions_15epochs')
df_vgg = df_vgg[['filename', 'predicted_label']]

In [13]:
df_vgg

Unnamed: 0,filename,predicted_label
0,img_1.jpg,3
1,img_10.jpg,4
2,img_100.jpg,2
3,img_101.jpg,3
4,img_102.jpg,6
...,...,...
761,img_95.jpg,4
762,img_96.jpg,3
763,img_97.jpg,3
764,img_98.jpg,3


In [14]:
correct = (df_annotated['label'] == df_vgg['predicted_label'])
accuracy = (correct.sum() / correct.size) * 100
accuracy

13.577023498694519

In [15]:
fig = px.bar(
    x = ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise'],
    y = [list(df_annotated['emotion'].values).count(i) for i in np.unique(df_annotated['emotion'].values)], 
    color = np.unique(df_annotated['emotion'].values),
    color_continuous_scale="Emrld"
)

fig.update_xaxes(title="Emotions")
fig.update_yaxes(title = "No. of Images")
fig.update_layout(
    showlegend = True,
    title = {
        'text': 'Annotated Rijksmuseum Data ',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    }
)

fig.show()

In [29]:
np.unique(df_vgg['predicted_label'].values)

array([1, 2, 3, 4, 6, 7], dtype=int64)

In [28]:
fig = px.bar(
    x = ['Angry', 'Sad', 'Happy', 'Fear', 'Disgust', 'Neutral'],
    y = [list(df_vgg['predicted_label'].values).count(i) for i in np.unique(df_vgg['predicted_label'].values)], 
    color = np.unique(df_vgg['predicted_label'].values),
    color_continuous_scale="Emrld"
)

fig.update_xaxes(title="Emotions")
fig.update_yaxes(title = "No. of Images")
fig.update_layout(
    showlegend = True,
    title = {
        'text': 'VGG-labelled Rijksmuseum Data ',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'
    }
)

fig.show()