In [11]:
import joblib
import numpy as np
from quadratic_weighted_kappa import quadratic_weighted_kappa
from quadratic_weighted_kappa_edit import onehalf_weighted_kappa, linear_weighted_kappa
from sklearn.metrics import cohen_kappa_score, confusion_matrix
import pandas as pd
import krippendorff

In [12]:
asap = joblib.load("asap1_new_adjudication")
pred_higher = joblib.load('score_model_normalized_asap1_gb_higher')
pred_mean = joblib.load('score_model_normalized_asap1_gb_mean')
pred_sum = joblib.load('score_model_normalized_asap1_gb_sum')

asap.head()

Unnamed: 0,essay,rater1_domain1,rater2_domain1,domain1_score,higher,mean,sum
0,"Dear local newspaper, I think effects computer...",4,4,8,4,4.0,8
1,"Dear @CAPS1 @CAPS2, I believe that using compu...",5,4,9,5,5.0,9
2,"Dear, @CAPS1 @CAPS2 @CAPS3 More and more peopl...",4,3,7,4,4.0,7
3,"Dear Local Newspaper, @CAPS1 I have found that...",5,5,10,5,5.0,10
4,"Dear @LOCATION1, I know having computers has a...",4,4,8,4,4.0,8


In [13]:
score_higher = asap[['higher']].to_numpy().reshape(-1)
score_mean = asap[['mean']].to_numpy().reshape(-1)
score_sum = asap[['sum']].to_numpy().reshape(-1)

In [14]:
# Convert element from float to int
pred_higher = np.array(list(map(int, pred_higher)))
pred_mean = np.array(list(map(int, pred_mean)))
score_mean = np.array(list(map(int, score_mean)))
pred_sum = np.array(list(map(int, pred_sum)))

In [15]:
print("QWK higher :", quadratic_weighted_kappa(score_higher, pred_higher))
print("QWK mean :", quadratic_weighted_kappa(score_mean, pred_mean))
print("QWK sum :", quadratic_weighted_kappa(score_sum, pred_sum))

QWK higher : 0.7200701434268408
QWK mean : 0.714324505642753
QWK sum : 0.7840315328830542


In [16]:
concat_higher = np.concatenate((score_higher.reshape(1,-1),pred_higher.reshape(1,-1)), axis=0)
concat_mean = np.concatenate((score_mean.reshape(1,-1),pred_mean.reshape(1,-1)), axis=0)
concat_sum = np.concatenate((score_sum.reshape(1,-1),pred_sum.reshape(1,-1)), axis=0)

In [17]:
print("Kripppendorff's alpha higher : ", krippendorff.alpha(concat_higher))
print("Kripppendorff's alpha mean : ", krippendorff.alpha(concat_mean))
print("Kripppendorff's alpha sum : ", krippendorff.alpha(concat_sum))

Kripppendorff's alpha higher :  0.720019779772241
Kripppendorff's alpha mean :  0.714387600740739
Kripppendorff's alpha sum :  0.7840904166004541


In [22]:
print("1.5WK higher :", onehalf_weighted_kappa(score_higher, pred_higher))
print("1.5WK mean :", onehalf_weighted_kappa(score_mean, pred_mean))
print("1.5WK sum :", onehalf_weighted_kappa(score_sum, pred_sum))

1.5WK higher : 0.6593115196951331
1.5WK mean : 0.6603627454921923
1.5WK sum : 0.7030275264407091


In [20]:
print("LWK higher :", linear_weighted_kappa(score_higher, pred_higher))
print("LWK mean :", linear_weighted_kappa(score_mean, pred_mean))
print("LWK sum :", linear_weighted_kappa(score_sum, pred_sum))

LWK higher : 0.6002911196821037
LWK mean : 0.608937765714948
LWK sum : 0.599064875557124


In [21]:
print("Kappa higher :", cohen_kappa_score(score_higher, pred_higher))
print("Kappa mean :", cohen_kappa_score(score_mean, pred_mean))
print("Kappa sum :", cohen_kappa_score(score_sum, pred_sum))

Kappa higher : 0.5015805520460006
Kappa mean : 0.5249126806225701
Kappa sum : 0.347492545624009


In [64]:
from pycm import *

cm = ConfusionMatrix(score_higher, pred_higher)
print("Bangdiwala's B higher :", cm.B)
cm = ConfusionMatrix(score_mean, pred_mean)
print("Bangdiwala's B mean :", cm.B)
cm = ConfusionMatrix(score_sum, pred_sum)
print("Bangdiwala's B sum :", cm.B)

Bangdiwala's B higher : 0.5298368233341871
Bangdiwala's B mean : 0.5502599969792882
Bangdiwala's B sum : 0.3152256596815375


# TEST

In [77]:
c = confusion_matrix(score_mean, pred_mean)

In [78]:
confusion_matrix(score_mean, pred_mean)

array([[  0,   2,   6,   2,   0,   0],
       [  0,   1,  15,   2,   0,   0],
       [  0,   0,  76,  51,   0,   0],
       [  0,   0,  23, 653, 146,   0],
       [  0,   0,   0, 145, 478,  27],
       [  0,   0,   0,   9,  98,  49]], dtype=int64)

In [67]:
c = confusion_matrix(score_sum, pred_sum)
c

array([[  0,   1,   1,   3,   3,   2,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   1,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   8,   7,   2,   0,   0,   0,   0,   0],
       [  0,   0,   0,   4,  11,   1,   1,   0,   0,   0,   0],
       [  0,   0,   0,   2,  44,  54,  10,   0,   0,   0,   0],
       [  0,   0,   0,   0,   8,  68,  56,   3,   0,   0,   0],
       [  0,   0,   0,   1,   6,  82, 398, 181,  19,   0,   0],
       [  0,   0,   0,   0,   0,   6,  76, 190,  56,   6,   0],
       [  0,   0,   0,   0,   0,   0,  25, 112, 136,  43,   0],
       [  0,   0,   0,   0,   0,   0,   6,  15,  51,  36,   1],
       [  0,   0,   0,   0,   0,   1,   1,   3,  18,  24,   0]],
      dtype=int64)

In [68]:
type(c)

numpy.ndarray

In [52]:
import pandas as pd
df = pd.DataFrame (c)

## save to xlsx file

filepath = 'a.xlsx'
df.to_excel(filepath, index=False)

In [69]:
def weighted_matrix(N):
    weighted = np.zeros((N,N)) 
    for i in range(len(weighted)):
        for j in range(len(weighted)):
            weighted[i][j] = float(((i-j)**2)/(N-1)**2) 
    return weighted

In [70]:
w = weighted_matrix(11)

In [76]:
df = pd.DataFrame (w)
filepath = 'w.xlsx'
df.to_excel(filepath, index=False)