In [13]:
import joblib
import numpy as np
from quadratic_weighted_kappa import quadratic_weighted_kappa
from quadratic_weighted_kappa_edit import onehalf_weighted_kappa, linear_weighted_kappa
from sklearn.metrics import cohen_kappa_score, confusion_matrix
import pandas as pd
import krippendorff

In [14]:
asap = joblib.load("asap4_new_adjudication")
pred_higher = joblib.load('score_model_normalized_asap4_gb_higher')
pred_mean = joblib.load('score_model_normalized_asap4_gb_mean')
pred_sum = joblib.load('score_model_normalized_asap4_gb_sum')

asap.head()

Unnamed: 0,essay,rater1_domain1,rater2_domain1,domain1_score,higher,mean,sum,check
0,The author concludes the story with this becau...,0,0,0,0,0.0,0,True
1,The narrater has that in with Paragraph becuse...,0,0,0,0,0.0,0,True
2,The author concludes the story with that passa...,3,2,3,3,3.0,5,True
3,The author ended the story with this paragraph...,1,2,2,2,2.0,3,True
4,The author concludes the story with this parag...,2,2,2,2,2.0,4,True


In [15]:
score_higher = asap[['higher']].to_numpy().reshape(-1)
score_mean = asap[['mean']].to_numpy().reshape(-1)
score_sum = asap[['sum']].to_numpy().reshape(-1)

In [16]:
# Convert element from float to int
pred_higher = np.array(list(map(int, pred_higher)))
pred_mean = np.array(list(map(int, pred_mean)))
score_mean = np.array(list(map(int, score_mean)))
pred_sum = np.array(list(map(int, pred_sum)))

In [17]:
print("QWK higher :", quadratic_weighted_kappa(score_higher, pred_higher))
print("QWK mean :", quadratic_weighted_kappa(score_mean, pred_mean))
print("QWK sum :", quadratic_weighted_kappa(score_sum, pred_sum))

QWK higher : 0.7648971450219995
QWK mean : 0.7741693549474011
QWK sum : 0.8078743798157526


In [18]:
concat_higher = np.concatenate((score_higher.reshape(1,-1),pred_higher.reshape(1,-1)), axis=0)
concat_mean = np.concatenate((score_mean.reshape(1,-1),pred_mean.reshape(1,-1)), axis=0)
concat_sum = np.concatenate((score_sum.reshape(1,-1),pred_sum.reshape(1,-1)), axis=0)

In [19]:
print("Kripppendorff's alpha higher : ", krippendorff.alpha(concat_higher))
print("Kripppendorff's alpha mean : ", krippendorff.alpha(concat_mean))
print("Kripppendorff's alpha sum : ", krippendorff.alpha(concat_sum))

Kripppendorff's alpha higher :  0.764955094501804
Kripppendorff's alpha mean :  0.7742280427099306
Kripppendorff's alpha sum :  0.8079283700529852


In [20]:
print("1.5WK higher :", onehalf_weighted_kappa(score_higher, pred_higher))
print("1.5WK mean :", onehalf_weighted_kappa(score_mean, pred_mean))
print("1.5WK sum :", onehalf_weighted_kappa(score_sum, pred_sum))

1.5WK higher : 0.7034696678167944
1.5WK mean : 0.7147017073339794
1.5WK sum : 0.7288574118328832


In [21]:
print("LWK higher :", linear_weighted_kappa(score_higher, pred_higher))
print("LWK mean :", linear_weighted_kappa(score_mean, pred_mean))
print("LWK sum :", linear_weighted_kappa(score_sum, pred_sum))

LWK higher : 0.6374227001987594
LWK mean : 0.6507558822513777
LWK sum : 0.6193643897267859


In [22]:
print("Kappa higher :", cohen_kappa_score(score_higher, pred_higher))
print("Kappa mean :", cohen_kappa_score(score_mean, pred_mean))
print("Kappa sum :", cohen_kappa_score(score_sum, pred_sum))

Kappa higher : 0.5099381716706819
Kappa mean : 0.5273457788408802
Kappa sum : 0.31820401055562664


In [23]:
from pycm import *

cm = ConfusionMatrix(score_higher, pred_higher)
print("Bangdiwala's B higher :", cm.B)
cm = ConfusionMatrix(score_mean, pred_mean)
print("Bangdiwala's B mean :", cm.B)
cm = ConfusionMatrix(score_sum, pred_sum)
print("Bangdiwala's B sum :", cm.B)

Bangdiwala's B higher : 0.45559477950654587
Bangdiwala's B mean : 0.46947983438678076
Bangdiwala's B sum : 0.29391032286032004


# Test

In [26]:
c = confusion_matrix(score_mean, pred_mean)
c

array([[156, 146,   9,   1],
       [ 65, 486,  84,   2],
       [  1, 114, 400,  55],
       [  0,   4, 106, 143]], dtype=int64)

In [29]:
c = confusion_matrix(score_sum, pred_sum)
c

array([[ 66, 177,  55,  11,   1,   2,   0],
       [  1,   3,   2,   4,   0,   0,   0],
       [ 14, 185, 319,  96,  11,   0,   2],
       [  0,   4,  60, 111,  51,   9,   0],
       [  0,   1,  18,  88, 175,  51,   2],
       [  0,   0,   2,   9,  61,  82,   4],
       [  0,   2,   0,   1,  17,  63,  12]], dtype=int64)

In [31]:
import pandas as pd
df = pd.DataFrame (c)

## save to xlsx file

filepath = 'a.xlsx'
df.to_excel(filepath, index=False)

In [None]:
def weighted_matrix(N):
    weighted = np.zeros((N,N)) 
    for i in range(len(weighted)):
        for j in range(len(weighted)):
            weighted[i][j] = float(((i-j)**2)/(N-1)**2) 
    return weighted

In [None]:
w = weighted_matrix(11)
w

In [None]:
df = pd.DataFrame (w)
filepath = 'w.xlsx'
df.to_excel(filepath, index=False)