In [13]:
import joblib
import numpy as np
from quadratic_weighted_kappa import quadratic_weighted_kappa
from quadratic_weighted_kappa_edit import onehalf_weighted_kappa, linear_weighted_kappa
from sklearn.metrics import cohen_kappa_score, confusion_matrix
import pandas as pd
import krippendorff

In [2]:
asap = joblib.load("asap5_new_adjudication")
pred_higher = joblib.load('score_model_normalized_asap5_gb_higher')
pred_mean = joblib.load('score_model_normalized_asap5_gb_mean')
pred_sum = joblib.load('score_model_normalized_asap5_gb_sum')

asap.head()

Unnamed: 0,essay,rater1_domain1,rater2_domain1,domain1_score,higher,mean,sum,check
0,"In this memoir of Narciso Rodriguez, @PERSON3'...",2,2,2,2,2.0,4,True
1,Throughout the excerpt from Home the Blueprint...,2,2,2,2,2.0,4,True
2,The mood the author created in the memoir is l...,3,3,3,3,3.0,6,True
3,The mood created by the author is showing how ...,1,0,1,1,1.0,1,True
4,The mood created in the memoir is happiness an...,2,3,3,3,3.0,5,True


In [3]:
score_higher = asap[['higher']].to_numpy().reshape(-1)
score_mean = asap[['mean']].to_numpy().reshape(-1)
score_sum = asap[['sum']].to_numpy().reshape(-1)

In [4]:
# Convert element from float to int
pred_higher = np.array(list(map(int, pred_higher)))
pred_mean = np.array(list(map(int, pred_mean)))
score_mean = np.array(list(map(int, score_mean)))
pred_sum = np.array(list(map(int, pred_sum)))

In [5]:
print("QWK higher :", quadratic_weighted_kappa(score_higher, pred_higher))
print("QWK mean :", quadratic_weighted_kappa(score_mean, pred_mean))
print("QWK sum :", quadratic_weighted_kappa(score_sum, pred_sum))

QWK higher : 0.8076853895838443
QWK mean : 0.8107576056861439
QWK sum : 0.863937166689001


In [6]:
concat_higher = np.concatenate((score_higher.reshape(1,-1),pred_higher.reshape(1,-1)), axis=0)
concat_mean = np.concatenate((score_mean.reshape(1,-1),pred_mean.reshape(1,-1)), axis=0)
concat_sum = np.concatenate((score_sum.reshape(1,-1),pred_sum.reshape(1,-1)), axis=0)

In [7]:
print("Kripppendorff's alpha higher : ", krippendorff.alpha(concat_higher))
print("Kripppendorff's alpha mean : ", krippendorff.alpha(concat_mean))
print("Kripppendorff's alpha sum : ", krippendorff.alpha(concat_sum))

Kripppendorff's alpha higher :  0.8077160074886721
Kripppendorff's alpha mean :  0.8107872056903545
Kripppendorff's alpha sum :  0.8639747731775991


In [8]:
print("1.5WK higher :", onehalf_weighted_kappa(score_higher, pred_higher))
print("1.5WK mean :", onehalf_weighted_kappa(score_mean, pred_mean))
print("1.5WK sum :", onehalf_weighted_kappa(score_sum, pred_sum))

1.5WK higher : 0.7499027047945027
1.5WK mean : 0.7550985918979962
1.5WK sum : 0.789380344768872


In [9]:
print("LWK higher :", linear_weighted_kappa(score_higher, pred_higher))
print("LWK mean :", linear_weighted_kappa(score_mean, pred_mean))
print("LWK sum :", linear_weighted_kappa(score_sum, pred_sum))

LWK higher : 0.6862176466969833
LWK mean : 0.6941360282996749
LWK sum : 0.6801850028824921


In [10]:
print("Kappa higher :", cohen_kappa_score(score_higher, pred_higher))
print("Kappa mean :", cohen_kappa_score(score_mean, pred_mean))
print("Kappa sum :", cohen_kappa_score(score_sum, pred_sum))

Kappa higher : 0.5594144204927043
Kappa mean : 0.5737309630972811
Kappa sum : 0.35334117954753497


In [11]:
from pycm import *

cm = ConfusionMatrix(score_higher, pred_higher)
print("Bangdiwala's B higher :", cm.B)
cm = ConfusionMatrix(score_mean, pred_mean)
print("Bangdiwala's B mean :", cm.B)
cm = ConfusionMatrix(score_sum, pred_sum)
print("Bangdiwala's B sum :", cm.B)

Bangdiwala's B higher : 0.4785221974348867
Bangdiwala's B mean : 0.4944744107029093
Bangdiwala's B sum : 0.21494879969766292


# Test

In [20]:
c = confusion_matrix(score_mean, pred_mean)
c

array([[  2,  18,   2,   0,   0],
       [  0, 189, 103,   5,   0],
       [  0,  55, 481, 102,   4],
       [  0,   4, 118, 409,  55],
       [  0,   0,   1,  80, 177]], dtype=int64)

In [24]:
c = confusion_matrix(score_sum, pred_sum)
c

array([[  2,  12,   7,   1,   0,   0,   0,   0,   0],
       [  0,  17,  22,   6,   1,   1,   0,   0,   0],
       [  0,  12, 121,  98,  15,   1,   3,   0,   0],
       [  0,   3,  35, 116,  68,  13,   1,   0,   0],
       [  0,   0,  10,  89, 194,  98,  12,   3,   0],
       [  0,   0,   3,   8,  78, 132,  58,   8,   1],
       [  0,   0,   0,   1,  18, 103, 127,  48,   1],
       [  0,   0,   0,   0,   0,   9,  56,  82,   5],
       [  0,   0,   0,   0,   0,   1,  14,  66,  25]], dtype=int64)

In [25]:
import pandas as pd
df = pd.DataFrame (c)

## save to xlsx file

filepath = 'a.xlsx'
df.to_excel(filepath, index=False)

In [17]:
def weighted_matrix(N):
    weighted = np.zeros((N,N)) 
    for i in range(len(weighted)):
        for j in range(len(weighted)):
            weighted[i][j] = float(((i-j)**2)/(N-1)**2) 
    return weighted

In [26]:
w = weighted_matrix(9)
w

array([[0.      , 0.015625, 0.0625  , 0.140625, 0.25    , 0.390625,
        0.5625  , 0.765625, 1.      ],
       [0.015625, 0.      , 0.015625, 0.0625  , 0.140625, 0.25    ,
        0.390625, 0.5625  , 0.765625],
       [0.0625  , 0.015625, 0.      , 0.015625, 0.0625  , 0.140625,
        0.25    , 0.390625, 0.5625  ],
       [0.140625, 0.0625  , 0.015625, 0.      , 0.015625, 0.0625  ,
        0.140625, 0.25    , 0.390625],
       [0.25    , 0.140625, 0.0625  , 0.015625, 0.      , 0.015625,
        0.0625  , 0.140625, 0.25    ],
       [0.390625, 0.25    , 0.140625, 0.0625  , 0.015625, 0.      ,
        0.015625, 0.0625  , 0.140625],
       [0.5625  , 0.390625, 0.25    , 0.140625, 0.0625  , 0.015625,
        0.      , 0.015625, 0.0625  ],
       [0.765625, 0.5625  , 0.390625, 0.25    , 0.140625, 0.0625  ,
        0.015625, 0.      , 0.015625],
       [1.      , 0.765625, 0.5625  , 0.390625, 0.25    , 0.140625,
        0.0625  , 0.015625, 0.      ]])

In [27]:
df = pd.DataFrame (w)
filepath = 'w.xlsx'
df.to_excel(filepath, index=False)