In [1]:
import joblib
import numpy as np
#from quadratic_weighted_kappa import quadratic_weighted_kappa
from quadratic_weighted_kappa_edit import onehalf_weighted_kappa, linear_weighted_kappa, quadratic_weighted_kappa
from sklearn.metrics import cohen_kappa_score, confusion_matrix
import pandas as pd
import krippendorff

In [2]:
asap = joblib.load("asap7_new_adjudication")
pred_higher = joblib.load('score_model_normalized_asap7_gb_higher')
pred_mean = joblib.load('score_model_normalized_asap7_gb_mean')
pred_sum = joblib.load('score_model_normalized_asap7_gb_sum')

asap.head()

Unnamed: 0,essay,rater1_domain1,rater2_domain1,domain1_score,higher,mean,sum
0,Patience is when your waiting .I was patience ...,8,7,15,8,8.0,15
1,"I am not a patience person, like I canÆt sit i...",6,7,13,7,7.0,13
2,One day I was at basketball practice and I was...,7,8,15,8,8.0,15
3,I going to write about a time when I went to t...,8,9,17,9,9.0,17
4,It can be very hard for somebody to be patient...,7,6,13,7,7.0,13


In [3]:
score_higher = asap[['higher']].to_numpy().reshape(-1)
score_mean = asap[['mean']].to_numpy().reshape(-1)
score_sum = asap[['sum']].to_numpy().reshape(-1)

In [4]:
# Convert element from float to int
pred_higher = np.array(list(map(int, pred_higher)))
pred_mean = np.array(list(map(int, pred_mean)))
score_mean = np.array(list(map(int, score_mean)))
pred_sum = np.array(list(map(int, pred_sum)))

In [5]:
print("QWK higher :", quadratic_weighted_kappa(score_higher, pred_higher))
print("QWK mean :", quadratic_weighted_kappa(score_mean, pred_mean))
print("QWK sum :", quadratic_weighted_kappa(score_sum, pred_sum))

QWK higher : 0.7349701207331559
QWK mean : 0.7684750562271044
QWK sum : 0.7780350409677778


In [6]:
concat_higher = np.concatenate((score_higher.reshape(1,-1),pred_higher.reshape(1,-1)), axis=0)
concat_mean = np.concatenate((score_mean.reshape(1,-1),pred_mean.reshape(1,-1)), axis=0)
concat_sum = np.concatenate((score_sum.reshape(1,-1),pred_sum.reshape(1,-1)), axis=0)

In [7]:
print("Kripppendorff's alpha higher : ", krippendorff.alpha(concat_higher))
print("Kripppendorff's alpha mean : ", krippendorff.alpha(concat_mean))
print("Kripppendorff's alpha sum : ", krippendorff.alpha(concat_sum))

Kripppendorff's alpha higher :  0.7350533910079688
Kripppendorff's alpha mean :  0.7685464304702081
Kripppendorff's alpha sum :  0.7781054802468577


In [8]:
print("1.5WK higher :", onehalf_weighted_kappa(score_higher, pred_higher))
print("1.5WK mean :", onehalf_weighted_kappa(score_mean, pred_mean))
print("1.5WK sum :", onehalf_weighted_kappa(score_sum, pred_sum))

1.5WK higher : 0.643410997854341
1.5WK mean : 0.6746462175282537
1.5WK sum : 0.6848241938090125


In [9]:
print("LWK higher :", linear_weighted_kappa(score_higher, pred_higher))
print("LWK mean :", linear_weighted_kappa(score_mean, pred_mean))
print("LWK sum :", linear_weighted_kappa(score_sum, pred_sum))

LWK higher : 0.5197169115358754
LWK mean : 0.5450860045064283
LWK sum : 0.5477390637070911


In [10]:
print("Kappa higher :", cohen_kappa_score(score_higher, pred_higher))
print("Kappa mean :", cohen_kappa_score(score_mean, pred_mean))
print("Kappa sum :", cohen_kappa_score(score_sum, pred_sum))

Kappa higher : 0.17540833882626317
Kappa mean : 0.1787025755279279
Kappa sum : 0.0893560109013829


In [11]:
from pycm import *

cm = ConfusionMatrix(score_higher, pred_higher)
print("Bangdiwala's B higher :", cm.B)
cm = ConfusionMatrix(score_mean, pred_mean)
print("Bangdiwala's B mean :", cm.B)
cm = ConfusionMatrix(score_sum, pred_sum)
print("Bangdiwala's B sum :", cm.B)

Bangdiwala's B higher : 0.10091055510373312
Bangdiwala's B mean : 0.09457596811146088
Bangdiwala's B sum : 0.02928301609555704


# Test

In [25]:
c = confusion_matrix(score_mean, pred_mean)
c

array([[ 0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  1,  2,  1,  0,  1,  0,  0,  0,  0,  0],
       [ 0,  0,  2,  6, 11,  4,  1,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  9, 38, 21,  8,  2,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  3, 17, 36, 36, 10,  2,  0,  0,  0,  0],
       [ 0,  0,  0,  1, 18, 45, 43, 23,  8,  3,  1,  0,  0],
       [ 0,  0,  1,  0, 11, 31, 61, 48, 29,  5,  1,  0,  0],
       [ 0,  0,  0,  1,  2, 15, 66, 95, 73, 28,  4,  0,  0],
       [ 0,  0,  0,  0,  1,  6, 22, 73, 98, 62, 15,  1,  0],
       [ 0,  0,  0,  0,  1,  0,  8, 32, 59, 62, 22,  3,  0],
       [ 0,  0,  0,  0,  0,  0,  1,  8, 24, 56, 36,  5,  0],
       [ 0,  0,  0,  0,  0,  0,  2,  7,  8, 49, 58, 24,  1],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]], dtype=int64)

In [29]:
c = confusion_matrix(score_sum, pred_sum)
c

array([[ 0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  1,  0,  1,  0,  1,  0,  0,  1,  0,  0,
         0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  3,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  1,  0,  2,  2,  4,  4,  3,  3,  0,  1,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  1,  3,  1,  6,  5,  7,  2,  2,  0,  1,  0,  0,
         0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  1,  4,  9, 14,  9,  3,  6,  3,  1,  0,  0,
         0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  3,  0,  8, 10,  7, 11,  3,  5,  1,  1,
         0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  1,  3,  5, 11,  8,  7, 13,  4,  2,  0,
         1,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,

In [31]:
import pandas as pd
df = pd.DataFrame (c)

## save to xlsx file

filepath = 'a.xlsx'
df.to_excel(filepath, index=False)

In [45]:
def weighted_matrix(N):
    weighted = np.zeros((N,N)) 
    for i in range(len(weighted)):
        for j in range(len(weighted)):
            weighted[i][j] = float(((i-j)**2)/(N-1)**2) 
    return weighted

In [46]:
w = weighted_matrix(23)
w

array([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
        13., 14., 15., 16., 17., 18., 19., 20., 21., 22.],
       [ 1.,  0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.,
        12., 13., 14., 15., 16., 17., 18., 19., 20., 21.],
       [ 2.,  1.,  0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.,
        11., 12., 13., 14., 15., 16., 17., 18., 19., 20.],
       [ 3.,  2.,  1.,  0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.,
        10., 11., 12., 13., 14., 15., 16., 17., 18., 19.],
       [ 4.,  3.,  2.,  1.,  0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,
         9., 10., 11., 12., 13., 14., 15., 16., 17., 18.],
       [ 5.,  4.,  3.,  2.,  1.,  0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,
         8.,  9., 10., 11., 12., 13., 14., 15., 16., 17.],
       [ 6.,  5.,  4.,  3.,  2.,  1.,  0.,  1.,  2.,  3.,  4.,  5.,  6.,
         7.,  8.,  9., 10., 11., 12., 13., 14., 15., 16.],
       [ 7.,  6.,  5.,  4.,  3.,  2.,  1.,  0.,  1.,  2.,  3.,  4.,  5.,
   

In [51]:
df = pd.DataFrame (w)
filepath = 'w.xlsx'
df.to_excel(filepath, index=False)