In [1]:
import joblib
import numpy as np
from quadratic_weighted_kappa import quadratic_weighted_kappa
from quadratic_weighted_kappa_edit import onehalf_weighted_kappa, linear_weighted_kappa
from sklearn.metrics import cohen_kappa_score, confusion_matrix
import pandas as pd
import krippendorff

In [2]:
asap = joblib.load("asap6_new_adjudication")
pred_higher = joblib.load('score_model_normalized_asap6_gb_higher')
pred_mean = joblib.load('score_model_normalized_asap6_gb_mean')
pred_sum = joblib.load('score_model_normalized_asap6_gb_sum')

asap.head()

Unnamed: 0,essay,rater1_domain1,rater2_domain1,domain1_score,higher,mean,sum,check
0,There were many obstacles that the builders fa...,2,2,2,2,2.0,4,True
1,"Him from the start, there would have been many...",3,3,3,3,3.0,6,True
2,The builders of the Empire State Building face...,3,4,4,4,4.0,7,True
3,In the passage The Mooring Mast by Marcia Amid...,1,1,1,1,1.0,2,True
4,The builders of the Empire State Building face...,3,3,3,3,3.0,6,True


In [3]:
score_higher = asap[['higher']].to_numpy().reshape(-1)
score_mean = asap[['mean']].to_numpy().reshape(-1)
score_sum = asap[['sum']].to_numpy().reshape(-1)

In [4]:
# Convert element from float to int
pred_higher = np.array(list(map(int, pred_higher)))
pred_mean = np.array(list(map(int, pred_mean)))
score_mean = np.array(list(map(int, score_mean)))
pred_sum = np.array(list(map(int, pred_sum)))

In [5]:
print("QWK higher :", quadratic_weighted_kappa(score_higher, pred_higher))
print("QWK mean :", quadratic_weighted_kappa(score_mean, pred_mean))
print("QWK sum :", quadratic_weighted_kappa(score_sum, pred_sum))

QWK higher : 0.7963945570600047
QWK mean : 0.7930858310626703
QWK sum : 0.8547969417954621


In [6]:
concat_higher = np.concatenate((score_higher.reshape(1,-1),pred_higher.reshape(1,-1)), axis=0)
concat_mean = np.concatenate((score_mean.reshape(1,-1),pred_mean.reshape(1,-1)), axis=0)
concat_sum = np.concatenate((score_sum.reshape(1,-1),pred_sum.reshape(1,-1)), axis=0)

In [7]:
print("Kripppendorff's alpha higher : ", krippendorff.alpha(concat_higher))
print("Kripppendorff's alpha mean : ", krippendorff.alpha(concat_mean))
print("Kripppendorff's alpha sum : ", krippendorff.alpha(concat_sum))

Kripppendorff's alpha higher :  0.7964488116758291
Kripppendorff's alpha mean :  0.7931400715880911
Kripppendorff's alpha sum :  0.8548365450294626


In [8]:
print("1.5WK higher :", onehalf_weighted_kappa(score_higher, pred_higher))
print("1.5WK mean :", onehalf_weighted_kappa(score_mean, pred_mean))
print("1.5WK sum :", onehalf_weighted_kappa(score_sum, pred_sum))

1.5WK higher : 0.7336313984280348
1.5WK mean : 0.7268880475417661
1.5WK sum : 0.7735532142801064


In [9]:
print("LWK higher :", linear_weighted_kappa(score_higher, pred_higher))
print("LWK mean :", linear_weighted_kappa(score_mean, pred_mean))
print("LWK sum :", linear_weighted_kappa(score_sum, pred_sum))

LWK higher : 0.6656413257311811
LWK mean : 0.6547692142728581
LWK sum : 0.6563381503161397


In [10]:
print("Kappa higher :", cohen_kappa_score(score_higher, pred_higher))
print("Kappa mean :", cohen_kappa_score(score_mean, pred_mean))
print("Kappa sum :", cohen_kappa_score(score_sum, pred_sum))

Kappa higher : 0.5352279118271882
Kappa mean : 0.51549673860558
Kappa sum : 0.3209524348272055


In [11]:
from pycm import *

cm = ConfusionMatrix(score_higher, pred_higher)
print("Bangdiwala's B higher :", cm.B)
cm = ConfusionMatrix(score_mean, pred_mean)
print("Bangdiwala's B mean :", cm.B)
cm = ConfusionMatrix(score_sum, pred_sum)
print("Bangdiwala's B sum :", cm.B)

Bangdiwala's B higher : 0.510716081545085
Bangdiwala's B mean : 0.49048368590686064
Bangdiwala's B sum : 0.24430978861800015


# Test

In [14]:
c = confusion_matrix(score_mean, pred_mean)
c

array([[  8,  30,   2,   0,   0],
       [  2,  90,  70,   2,   0],
       [  0,  24, 275, 101,   0],
       [  0,   3, 105, 615,  98],
       [  0,   1,   0, 154, 220]], dtype=int64)

In [16]:
c = confusion_matrix(score_sum, pred_sum)
c

array([[  5,  22,  11,   2,   0,   0,   0,   0,   0],
       [  0,   4,  13,   7,   0,   0,   0,   0,   0],
       [  0,  11,  52,  49,  24,   4,   0,   0,   0],
       [  0,   3,   9,  55,  47,  10,   1,   0,   0],
       [  0,   0,   3,  53, 121,  80,  18,   0,   0],
       [  0,   0,   1,  10,  53, 108,  81,  10,   0],
       [  0,   0,   2,   2,  18, 122, 299, 114,   1],
       [  0,   0,   1,   0,   0,   7,  93, 133,   8],
       [  0,   0,   0,   0,   0,   0,  21,  96,  16]], dtype=int64)

In [17]:
import pandas as pd
df = pd.DataFrame (c)

## save to xlsx file

filepath = 'a.xlsx'
df.to_excel(filepath, index=False)

In [None]:
def weighted_matrix(N):
    weighted = np.zeros((N,N)) 
    for i in range(len(weighted)):
        for j in range(len(weighted)):
            weighted[i][j] = float(((i-j)**2)/(N-1)**2) 
    return weighted

In [None]:
w = weighted_matrix(11)
w

In [None]:
df = pd.DataFrame (w)
filepath = 'w.xlsx'
df.to_excel(filepath, index=False)