In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.pylab as plb
import seaborn as sns

## Load Data

In [None]:
df_data = pd.read_csv('ratings.csv')
df_data.head()

**Split data by approach**

In [None]:
encode_config = {
    'name': 'Autoencoder algorithm',
    'type': 'encode',
    'base_path': './encode_data'
}

social_graph_config = {
    'name': 'Autoencoder + Social graph data',
    'type': 'social_graph',
    'base_path': './social_graph_data'
}

content_based_config = {
    'name': 'Content based data',
    'type': 'contentBased',
    'base_path': './content_based_data'
}

experiment_config = encode_config

In [None]:
df_data_experiment = df_data[df_data['experimentType'] == experiment_config['type']]
print( experiment_config['name'] +' shape: '+ str(df_data_experiment.shape) )

## Plot Agree distribution

In [None]:
def group_by_artwork(df_data):

    df_artworks = df_data.groupby(['sourceArtworkId', 'ratedArtworkId','rating']).agg({'id':'count'})
    df_artworks = df_artworks.rename(columns={'id':'count'})
    df_artworks = df_artworks.reset_index()
    return df_artworks

In [None]:
df_data_to_plot = group_by_artwork(df_data_experiment)
df_data_to_plot.head()

**Get reference artworks id**

In [None]:
reference_artworks = df_data_to_plot['sourceArtworkId'].unique()
reference_artworks

In [None]:
nrows = 3
ncols = 2
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15,15))
sns.set(style="whitegrid")

i = 0
j = 0

for artwork_id in reference_artworks:
    df = df_data_to_plot[df_data_to_plot['sourceArtworkId']==artwork_id]
    ax = sns.barplot(x='ratedArtworkId', y='count', hue='rating', data=df, ax=axs[i,j])

    ax.set_title('Reference artwork id: '+ str(artwork_id), fontdict={'fontsize': 14, 'fontweight': 'medium'})
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    
    if j < ncols - 1 :
        j += 1
    else:
        i += 1
        j = 0

fig.tight_layout()
plt.show()

**Save plot**

In [None]:
fig.savefig(experiment_config['type'] +"_agree_dist.png", dpi=100)

**Plot individual artwork**

In [None]:
fig = plt.figure(figsize=(12,10))

df = df_data_to_plot[df_data_to_plot['sourceArtworkId']==7066]
ax = sns.barplot(x='ratedArtworkId', y='count', hue='rating', data=df)

ax.set_title(experiment_config['name']+ ': Agree/Disagree distribution', fontdict={'fontsize': 14, 'fontweight': 'medium'})
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)

plt.show()

In [None]:
fig.savefig(experiment_config['type'] +"_agree_dist_7066.png", dpi=100)

## Precision recall metrics

In [None]:
from sklearn.metrics import precision_recall_fscore_support, average_precision_score, precision_recall_curve

In [None]:
df_precision = df_data_to_plot[['sourceArtworkId', 'ratedArtworkId']].drop_duplicates()
df_precision.shape

In [None]:
df_precision.head()

In [None]:
def define_hit(row, df):

    agree_rate = 0
    disagree_rate = 0
    
    #Check if there is no vote for agree or disagree 
    if df[(df['sourceArtworkId'] == row['sourceArtworkId']) & (df['ratedArtworkId'] == row['ratedArtworkId']) & (df['rating'] == 'Agree')]['count'].shape[0] != 0:
        agree_rate = df[(df['sourceArtworkId'] == row['sourceArtworkId']) & (df['ratedArtworkId'] == row['ratedArtworkId']) & (df['rating'] == 'Agree')]['count'].values[0]
    if df[(df['sourceArtworkId'] == row['sourceArtworkId']) & (df['ratedArtworkId'] == row['ratedArtworkId']) & (df['rating'] == 'Disagree')]['count'].shape[0] != 0:
        disagree_rate = df[(df['sourceArtworkId'] == row['sourceArtworkId']) & (df['ratedArtworkId'] == row['ratedArtworkId']) & (df['rating'] == 'Disagree')]['count'].values[0]
    
    if agree_rate > disagree_rate:
        return 1
    elif agree_rate < disagree_rate:
        return 0
    else:
        return 1

In [None]:
df_precision['y_true'] = df_precision.apply(define_hit, axis=1, df=df_data_to_plot)
df_precision['y_pred'] = 1
df_precision.head()

**Select artwork**

In [None]:
import os
import json

with open(os.path.join(experiment_config['base_path'], 'experimentData.json')) as json_file:
            data_dict = json.loads(json_file.read())

sim_artworks = data_dict['sim_artworks']

artworks_id = []
for s_artwork in sim_artworks:
    artworks_id.append(s_artwork['id'])

artworks_id[:5]

In [None]:
df_precision_7066 = df_precision[df_precision['sourceArtworkId'] == 7066]
df_precision_7066 = df_precision_7066.set_index('ratedArtworkId')
df_precision_7066.head()

In [None]:
df_precision_7066 = df_precision_7066.reindex(artworks_id)
df_precision_7066.head()

In [None]:
#Precision Recall curve
precision_recall_curve(df_precision_7066['y_true'], df_precision_7066['y_pred'])

In [None]:
k_index = [5,10,15,20, 25]

prec_recall_metrics = []
average_precision_metric = []
for k in k_index:
    #Compute precision and recall
    prec_recall = precision_recall_fscore_support(
        df_precision_7066['y_true'][:k].values, df_precision_7066['y_pred'][:k].values, average='binary')
    prec_recall_metrics.append(prec_recall)
    #Compute average precision
    avr_prec = average_precision_score(
        df_precision_7066['y_true'][:k].values, df_precision_7066['y_pred'][:k].values)
    average_precision_metric.append(avr_prec)

In [None]:
df_metrics = pd.DataFrame(prec_recall_metrics, 
             index=k_index, columns=['precision', 'recall', 'f_score', 'support'])

df_metrics['avg_precision'] = average_precision_metric

df_metrics

In [None]:
df_metrics.to_csv(experiment_config['type'] +'_metrics.csv', index=True)

**Precission and Recall for each artwork**

In [None]:
prec_recall_metrics = []
for artwork_id in reference_artworks:
    df = df_precision[df_precision['sourceArtworkId']==artwork_id]
    prec_recall = precision_recall_fscore_support(df['y_true'], df['y_pred'], average='binary')
    prec_recall_metrics.append(prec_recall)

In [None]:
pd.DataFrame(prec_recall_metrics, index=reference_artworks, columns=['precision', 'recall', 'f_score', 'support'])