In [7]:
import sys
import os
import pandas as pd
import numpy as np

directory = os.path.abspath('../src')
if directory not in sys.path:
    sys.path.append(directory)

from qwk import quadratic_weighted_kappa
from read_data import get_num_labels, get_essay_sets

In [9]:
qwk_list = []
for essay_set in get_essay_sets():
    data_directory = os.path.join(os.path.abspath(os.path.join('..')), 
                                  'data',
                                  'predictions',
                                  'test_set_with_predictions.csv')
    test_set = pd.read_csv(data_directory)

    test_set_filtered = test_set[test_set['EssaySet'] == essay_set]

    unique_labels = get_num_labels(essay_set)
    
    qwk = quadratic_weighted_kappa(
        test_set_filtered['Score1'], 
        test_set_filtered['model_prediction'], 
        min_rating=0, 
        max_rating=unique_labels
    )
    
    print(f"Essay Set {essay_set}: QWK = {qwk}")
    qwk_list.append(qwk)

print("Avg QWK:", np.mean(qwk_list))

Essay Set 1.0: QWK = 0.8453992915921513
Essay Set 2.0: QWK = 0.8108073190032461
Essay Set 3.0: QWK = 0.6880394067040793
Essay Set 4.0: QWK = 0.723720329404166
Essay Set 5.0: QWK = 0.7566265060240964
Essay Set 6.0: QWK = 0.8081007590821117
Essay Set 7.0: QWK = 0.6880859469704157
Essay Set 8.0: QWK = 0.5072898887495116
Essay Set 9.0: QWK = 0.8423846514660962
Essay Set 10.0: QWK = 0.770996970474256
Avg QWK: 0.744145106947013


In [11]:
qwk_benchmark_list = []
for essay_set in get_essay_sets():
    data_directory = os.path.join(os.path.abspath(os.path.join('..')), 
                                  'data',
                                  'predictions',
                                  'test_set_with_benchmark_predictions.csv')
    test_set = pd.read_csv(data_directory)

    test_set_filtered = test_set[test_set['EssaySet'] == essay_set]

    unique_labels = get_num_labels(essay_set)
    
    qwk = quadratic_weighted_kappa(
        test_set_filtered['Score1'], 
        test_set_filtered['benchmark_prediction'], 
        min_rating=0, 
        max_rating=unique_labels
    )
    
    print(f"Essay Set {essay_set}: QWK = {qwk}")
    qwk_benchmark_list.append(qwk)

print("Avg QWK:", np.mean(qwk_benchmark_list))

Essay Set 1.0: QWK = 0.003318952180233481
Essay Set 2.0: QWK = -0.0035764665650250826
Essay Set 3.0: QWK = 0.010172235852496159
Essay Set 4.0: QWK = 0.038664151128317004
Essay Set 5.0: QWK = -0.009816906895208533
Essay Set 6.0: QWK = 0.014581341316493801
Essay Set 7.0: QWK = -0.00735237713092074
Essay Set 8.0: QWK = -0.06210502386906147
Essay Set 9.0: QWK = -0.011245811543447282
Essay Set 10.0: QWK = -0.07843500903193257
Avg QWK: -0.010579491455805523
