In [1]:
import numpy as np
import pandas as pd

## Load data

In [None]:
df_data = pd.read_csv('ratings.csv')
df_data.head()

**Split data by approach**

In [None]:
encode_config = {
    'name': 'Autoencoder algorithm',
    'type': 'encode',
    'base_path': './encode_data'
}

social_graph_config = {
    'name': 'Autoencoder + Social graph data',
    'type': 'social_graph',
    'base_path': './social_graph_data'
}

experiment_config = social_graph_config

In [None]:
df_data_experiment = df_data[df_data['experimentType'] == experiment_config['type']]
print( experiment_config['name'] +' shape: '+ str(df_data_experiment.shape) )

**Get pair (source artwork, rated artwork)**

In [None]:
import os
import json

In [None]:
def get_pairs(data_dict):
    artworks_pair = []
    source_artwork = data_dict['source_artwork']['id']

    for artwork in data_dict['sim_artworks']:
        artworks_pair.append((source_artwork, artwork['id']))

    return artworks_pair

In [None]:
def get_subjects(base_path):
    artworks_pair = []

    for file in os.listdir(base_path):
        with open(os.path.join(base_path, file)) as json_file:
            data_dict = json.loads(json_file.read())
            pairs = get_pairs(data_dict)
            artworks_pair.append(pairs)
    return artworks_pair

In [None]:
import os
import json

artworks_pair = get_subjects(experiment_config['base_path'])
len(artworks_pair[1])

## Define data as (subject, rater) format

**Get users**

In [None]:
def get_users(df):
    usersId = df['userId'].unique()
    return usersId

In [None]:
usersId = get_users(df_data_experiment)
print('Number of users: ' +  str(len(usersId)))

**Define table**

In [None]:
def get_rating(df, userId, p):
    r = df[(df['userId']==userId) & (df['sourceArtworkId']== p[0]) & (df['ratedArtworkId']== p[1])]['rating'].values
    if r.shape[0] == 0 :
        return 0
    if r[0] == 'Agree':
        return 1
    else:
        return 2
    

In [None]:
def create_user_items_table(df, usersId, artworks_pair ):
    default_matrix = np.zeros((usersId.shape[0], len(artworks_pair)))
    df_table = pd.DataFrame(data=default_matrix, index=usersId, columns=artworks_pair)
    for userId in usersId:
        for p in artworks_pair:
            r = get_rating(df, userId, p)
            df_table.loc[userId][p] = r
    
    return df_table

### Drop items with few ratings

**Get mode**

In [None]:
def fill_mode(row, mode):
    row = row.apply(lambda x : mode[row.name] if x == 0 else x)
    return row

In [None]:
def filter_by_mode(df):
    mode = df.mode().iloc[0]
    mode = mode.rename("mode")
    mode_valid = mode[mode != 0]
    df_user_item_filtered = df[list(mode_valid.index)]
    df_user_item_filtered = df_user_item_filtered.apply(fill_mode, mode = mode_valid)
    return df_user_item_filtered

In [None]:
data_fleiss_kappa = []

for pair in artworks_pair:
    df_user_items = create_user_items_table(df_data_experiment, usersId, pair)
    df_analyze = filter_by_mode(df_user_items)
    data_fleiss_kappa.append(df_analyze)


**Data shape of all Dataframe**

In [None]:
for df in data_fleiss_kappa:
    print(df.shape)

**Check unique raters**

In [None]:
raters = []

for df in data_fleiss_kappa:
    if df.shape[1] != 0 :
        raters.extend(list(df.index))

pd.Series(raters).unique().shape

## Krippendorff alpha

In [None]:
import krippendorff

alphas = []

for df in data_fleiss_kappa:
    if df.shape[1] != 0 :
        alpha = krippendorff.alpha(df.values)
        alphas.append(alpha)
alphas

## Fleiss Kappa 

In [None]:
from statsmodels.stats.inter_rater import fleiss_kappa, aggregate_raters

In [None]:
fleiss_kappa_coefficient = []

for df in data_fleiss_kappa:
    if df.shape[1] != 0 :
        v = aggregate_raters(df.transpose())
        coefficient = fleiss_kappa(v[0])
        fleiss_kappa_coefficient.append(coefficient)

fleiss_kappa_coefficient

**Global Fleiss Kappa coefficient**

In [None]:
df_global = data_fleiss_kappa[0]

for df in data_fleiss_kappa[1:]:
    if df.shape[1] != 0 :
        df_global = df_global.join(df)

df_global.shape

In [None]:
v = aggregate_raters(df_global.transpose())
global_coefficient = fleiss_kappa(v[0])
global_coefficient

## Summarize data

In [None]:
source_artworks_id = []

for df in data_fleiss_kappa:
    if df.shape[1] != 0 :
        source_artworks_id.append(list(df.columns)[0][0])

source_artworks_id

In [None]:
data_summarize = {
    'artwork_id' : source_artworks_id,
    'fleiss_kappa' : fleiss_kappa_coefficient,
    'krippendorff_alpha' : alphas
}

df_summarize = pd.DataFrame(data_summarize)
df_summarize

**Save summarize**

In [None]:
df_summarize.to_csv(experiment_config['type'] + '_summarize.csv', index=False)

In [None]:
df_summarize = df_summarize.dropna()
df_summarize

### Plot Fleiss kappa coefficient

In [None]:
import matplotlib.pyplot as plt
import matplotlib.pylab as plb
import seaborn as sns

In [None]:
# Set Figure values
fig = plt.figure(figsize=(10,8))
fig.suptitle(experiment_config['name'], fontsize=30)

sns.set(style="whitegrid")
ax = sns.barplot(x='artwork_id', y='fleiss_kappa', data=df_summarize)
ax.set(ylim=(0, 1))
ax.set_title('Fleiss coefficient interpretation', fontdict={'fontsize': 14, 'fontweight': 'medium'})
ax.set_ylabel('Fleiss coefficient')

# horizontal line indicating the threshold
min_threshold = 0.4
max_threshold = 0.6

# with lines
plt.axhline(y=min_threshold,linewidth=1, linestyle='--', color='k', alpha=0.6)
plt.axhline(y=max_threshold,linewidth=1, linestyle='--', color='k', alpha=0.6)

# with span
plt.axhspan(min_threshold, max_threshold, facecolor='#334f8d', alpha=0.2)
ax.text(x=-0.45, y=0.5, s='moderate agreement', alpha=0.7, color='#334f8d')

# Global coefficient
plt.axhline(y=global_coefficient,linewidth=2, linestyle='--', color='xkcd:crimson', alpha=0.7)
ax.text(x=0.005, y=global_coefficient+0.005, s='global fleiss coefficient', alpha=1,
        color='xkcd:crimson', fontsize=14)

# Add text box

textstr = '\n'.join((
    r'Poor agreement: $< 0$',
    r'Slight agreement: $0.01 – 0.20$',
    r'Fair agreement: $0.21 – 0.40$', 
    r'Moderate agreement: $0.41 – 0.60$',
    r'Substantial agreement: $0.61 – 0.80$',
    r'Almost perfect agreement: $0.81 – 1.0$'))

# these are matplotlib.patch.Patch properties
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)

# place a text box in upper left in axes coords
ax.text(0.5, 0.95, textstr, transform=ax.transAxes, fontsize=12,
        verticalalignment='top', bbox=props)

**Save plot**

In [None]:
ax.figure.savefig(experiment_config['type'] +"_fleiss.png")

### Plot Agree/Desagree values per artwork id

In [None]:
def get_ratings_count(df):
    ratings_count = []
    
    for c in list(df.columns):
        r = df[c].value_counts()
        r = r.rename(c[1])
        ratings_count.append(r)
    
    return ratings_count

In [None]:
def get_dataframe_to_plot(df):
    name = list(df.columns)[0][0]
    ratings_count = get_ratings_count(df)
    df_ratings = pd.DataFrame(ratings_count)
    df_ratings = df_ratings.fillna(0)
    df_ratings = df_ratings.reset_index()
    df_ratings = df_ratings.rename(columns={'index':'artwork id', 1.0: 'Agree', 2.0:'Desagree'})
    df_ratings = pd.melt(df_ratings, id_vars="artwork id", var_name="Agree", value_name="Desagree")
    df_ratings = df_ratings.rename(columns={'index':'artwork id', 'Agree': 'rating','Desagree':'count'})
    return df_ratings, name

In [None]:
data_to_plot = []
artworks_reference = []

for df in data_fleiss_kappa:
    if df.shape[1] != 0 :
        df_to_plot, name = get_dataframe_to_plot(df)
        data_to_plot.append(df_to_plot)
        artworks_reference.append(name)

data_to_plot[2].head()

In [None]:
list(data_fleiss_kappa[0].columns)[0][0]

**Plot figure**

In [None]:
nrows = 2
ncols = 2
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15,15))
sns.set(style="whitegrid")

i = 0
j = 0
index = 0

for df in data_to_plot:
    ax = sns.barplot(x='artwork id', y='count', hue='rating', data=df, ax=axs[i,j])

    ax.set_title('Reference artwork id: '+ str(artworks_reference[index]), fontdict={'fontsize': 14, 'fontweight': 'medium'})
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    
    index += 1
    if j < ncols - 1 :
        j += 1
    else:
        i += 1
        j = 0

fig.tight_layout()
plt.show()

**Save plot**

In [None]:
fig.savefig(experiment_config['type'] +"_hits.png", dpi=100)