In [34]:
import pandas as pd
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from itertools import cycle, islice
%matplotlib inline  
import warnings
warnings.filterwarnings('ignore')
from people import people

In [35]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

<IPython.core.display.Javascript object>

In [36]:
from IPython.display import display, HTML
def pretty_print(df):
    return display(HTML(df.to_html().replace("\n","<br>")))

In [37]:
results_file = '%s_trial_v4_clusters_results_wup_0.76.csv'
df = pd.read_csv(results_file % ('ivan'))

# remove rows where no clusters
df['Ivan_clusters'] = df['human_clusters']
df['Ivan_hits'] = df['hits']
df['Ivan_success'] = df['hits'] > 0
df = df.dropna(subset=['human_clusters'])

for person in set(people).difference('Ivan'):
    df_tmp = pd.read_csv(results_file % (person))
    df['%s_clusters' % (person)] = df_tmp['human_clusters']
    df['%s_hits' % (person)] = df_tmp['hits']
    df['%s_success' % (person)] = df_tmp['hits'] > 0

df=df.fillna('**NONE**')

# How well overall did machine do

T = 0.8, similarity metric is how similar two word senses are, based on the depth of the two senses in the taxonomy and that of their Least Common Subsumer (most specific ancestor node).

In [38]:
columns = ['artist_name', 'user_tags', 'human_clusters', 'machine_clusters'] + ['%s_hits' % (person) for person in people]
df_summary = df[columns]

def custom_style(row):
    color = 'white'
    if row['Vincent_hits'] == 3: color = '#FFFF99' 
    if row['Vincent_hits'] == 2: color = '#FFFF00' 
    if row['Vincent_hits'] == 1: color = '#FFFF66' 
    return ['background-color: %s' % color]*len(row.values)

df_summary.style.apply(custom_style, axis=1)



Unnamed: 0,artist_name,user_tags,human_clusters,machine_clusters,Ivan_hits,Vincent_hits,Marie_hits,Kathleen_hits
0,Jeffry Mitchell,"Jesus,glaze,carpenter,freeze,earthenware","Spirituality,History,Economy",**NONE**,0,0,0,0
1,Fernanda Gomes,"self-sufficient,vulnerability,humanness,nature,constructivist,site","Culture,Materiality,Arts","Identity,Power,Familial",0,0,0,0
2,Asier Mendizabal,"crowd,popular,disintegrate,repetition,social_group,ritual","Culture,Identity,Community","Community,Culture,History",2,1,2,2
3,George Pfau,"thereness,disembody,decay,texture,diffuse,human_body","Immaterial,Fantasy,Body","Materiality,Space,Body",1,1,1,1
4,Judy Chicago,"closeness,fabrication,hue,feminism,femininity,polish","Identity,Materiality,Body","Emotion,Mind,Spirituality",0,0,0,0
5,Anna Molska,"Poland,space,confinement,struggle,horizon,dimensionality","Physics,Arts,Body","Land,Urbanization,Community",0,1,1,0
6,Walead Beshty,"abstraction,geometric,rhythm,materiality,color,development","Design,Materiality,Arts","Arts,Materiality,Land",2,2,1,2
7,Pia Camil,"craft,abstraction,digit,space,commercialization,handicraft","Materiality,Land,Economy","Arts,Land,Media",1,1,0,1


## Note that the first work "Jeffry Mitchell" with tags:

- Jesus
- glaze
- carpenter
- freeze
- earthenware

which was clustered into `Spirituality`, `History`, and `Economy` was not assigned any clusters by the *AI* even though there's a fairly intuitive connection between `Jesus` and `religion.n.01` - this is because the wordnet threshold is set to 0.76 and the path distance according to wordnet for this pathway does not exceed that threshold.

`jesus.n.01 -> religion.n.01 == 0.63`

despite the correct form of `Jesus` with the wordnet definition of _"a teacher and prophet born in Bethlehem and active in Nazareth; his life and sermons form the basis for Christianity (circa 4 BC - AD 29)"_. Wordnet is also unaware that Jesus was a carpenter.

Because of this we drop the row from the test

# Now calculate the multi-label metric

The Hamming score is closely related to the Hamming loss (the fraction of labels that are incorrectly predicted), but allows for different number of labels (clusters) predicted by man and machine.

In [39]:
df = df.drop(0, axis='index')

In [40]:
from metrics import hamming_score
for person in people:
    y_true = list(df['%s_clusters' % (person)].str.split(','))
    y_pred = list(df['machine_clusters'].str.split(','))
    score = hamming_score(y_true, y_pred)
    print('{0} <--> computer score \t{1}'.format(person, score))
    
    

Ivan <--> computer score 	0.7619047619047619
Vincent <--> computer score 	0.7619047619047619
Marie <--> computer score 	0.8095238095238095
Kathleen <--> computer score 	0.8571428571428571


In [41]:
from metrics import hamming_score
anchor = 'Kathleen'
for person in set(people) - set([anchor]):
    y_true = list(df['%s_clusters' % (person)].str.split(','))
    y_pred = list(df['%s_clusters' % (anchor)].str.split(','))
    score = hamming_score(y_true, y_pred)
    print('{0} <--> {1} \t{2}'.format(person, anchor, score))


Ivan <--> Kathleen 	0.9047619047619048
Vincent <--> Kathleen 	0.9047619047619048
Marie <--> Kathleen 	0.9523809523809524
