In [82]:
import joblib
import pandas as pd
from quadratic_weighted_kappa import quadratic_weighted_kappa
from scipy.stats import pearsonr
from sklearn.metrics import accuracy_score, mean_squared_error
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

In [2]:
scores = pd.read_csv('../training_set_rel3.tsv', sep='\t', encoding='ISO-8859-1')
y =  joblib.load('score_asap7')

In [3]:
scores = scores[scores['essay_set'] == 7]
scores = scores[['rater1_domain1', 'rater2_domain1', 'rater3_domain1']]
len(scores)

1569

### Gradient Boosting : Quantitative Assessment of AES Model

In [83]:
model_score = joblib.load('score_model_gb_normalized_float_laser_whole')

In [84]:
qwk_model = quadratic_weighted_kappa(y, model_score)
print("QWK Score: ", qwk_model)

qwk_human = quadratic_weighted_kappa(scores['rater1_domain1'], scores['rater2_domain1'])
print("Human Agreement: ", qwk_human)

print("Degradation: ", qwk_human - qwk_model)

rater1_score = scores['rater1_domain1']
rater2_score = scores['rater2_domain1']

human1_score = np.array(rater1_score)
human2_score = np.array(rater2_score)

model_mean = np.mean(model_score)
#print(model_mean)
y_mean = np.mean(y)
#print(y_mean)
mean_diff = abs(model_mean-y_mean)
#print(mean_diff)

model_variance = np.var(model_score)
#print(model_variance)
y_variance = np.var(y)
#print(y_variance)

z = mean_diff / np.sqrt((model_variance + y_variance)/2)
print("Z : ",z)

# Compute Pearson correlation
corr, p_value = pearsonr(y, model_score)

print(f"Pearson correlation: {corr:.3f}")
print(f"P-value: {p_value:.3f}")

# Round the model predictions
model_score_rounded = np.rint(model_score).astype(int)

# Compute absolute errors for adjacent accuracy
errors = np.abs(y - model_score_rounded)
adjacent = np.sum(errors <= 3)
adjacent_accuracy = adjacent / len(y)
print(f'Adjacent accuracy: {adjacent_accuracy:.2%}')

# Compute exact accuracy
exact_matches = np.sum(y == model_score_rounded)
exact_accuracy = exact_matches / len(y)
print(f'Exact accuracy: {exact_accuracy:.2%}')

# Calculate MSE
mse = mean_squared_error(y, model_score)

# Calculate RMSE
rmse = np.sqrt(mse)

print(f'RMSE: {rmse:.3f}')

QWK Score:  0.767266817272541
Human Agreement:  0.7214784742548883
Degradation:  -0.045788343017652666
Z :  0.0001491376728501522
Pearson correlation: 0.805
P-value: 0.000
Adjacent accuracy: 80.82%
Exact accuracy: 13.70%
RMSE: 2.728


### Random Forest : Quantitative Assessment of AES Model

In [85]:
model_score = joblib.load('score_model_rf_normalized_float_laser_whole')

In [86]:
qwk_model = quadratic_weighted_kappa(y, model_score)
print("QWK Score: ", qwk_model)

qwk_human = quadratic_weighted_kappa(scores['rater1_domain1'], scores['rater2_domain1'])
print("Human Agreement: ", qwk_human)

print("Degradation: ", qwk_human - qwk_model)

rater1_score = scores['rater1_domain1']
rater2_score = scores['rater2_domain1']

human1_score = np.array(rater1_score)
human2_score = np.array(rater2_score)

model_mean = np.mean(model_score)
#print(model_mean)
y_mean = np.mean(y)
#print(y_mean)
mean_diff = abs(model_mean-y_mean)
#print(mean_diff)

model_variance = np.var(model_score)
#print(model_variance)
y_variance = np.var(y)
#print(y_variance)

z = mean_diff / np.sqrt((model_variance + y_variance)/2)
print("Z : ",z)

# Compute Pearson correlation
corr, p_value = pearsonr(y, model_score)

print(f"Pearson correlation: {corr:.3f}")
print(f"P-value: {p_value:.3f}")

# Round the model predictions
model_score_rounded = np.rint(model_score).astype(int)

# Compute absolute errors for adjacent accuracy
errors = np.abs(y - model_score_rounded)
adjacent = np.sum(errors <= 3)
adjacent_accuracy = adjacent / len(y)
print(f'Adjacent accuracy: {adjacent_accuracy:.2%}')

# Compute exact accuracy
exact_matches = np.sum(y == model_score_rounded)
exact_accuracy = exact_matches / len(y)
print(f'Exact accuracy: {exact_accuracy:.2%}')

# Calculate MSE
mse = mean_squared_error(y, model_score)

# Calculate RMSE
rmse = np.sqrt(mse)

print(f'RMSE: {rmse:.3f}')

QWK Score:  0.7128991492478383
Human Agreement:  0.7214784742548883
Degradation:  0.008579325007050054
Z :  0.006820330767998374
Pearson correlation: 0.777
P-value: 0.000
Adjacent accuracy: 77.82%
Exact accuracy: 14.60%
RMSE: 2.923


### Ridge Regression : Quantitative Assessment of AES Model

In [87]:
model_score = joblib.load('score_model_rr_normalized_float_laser_whole')

In [88]:
qwk_model = quadratic_weighted_kappa(y, model_score)
print("QWK Score: ", qwk_model)

qwk_human = quadratic_weighted_kappa(scores['rater1_domain1'], scores['rater2_domain1'])
print("Human Agreement: ", qwk_human)

print("Degradation: ", qwk_human - qwk_model)

rater1_score = scores['rater1_domain1']
rater2_score = scores['rater2_domain1']

human1_score = np.array(rater1_score)
human2_score = np.array(rater2_score)

model_mean = np.mean(model_score)
#print(model_mean)
y_mean = np.mean(y)
#print(y_mean)
mean_diff = abs(model_mean-y_mean)
#print(mean_diff)

model_variance = np.var(model_score)
#print(model_variance)
y_variance = np.var(y)
#print(y_variance)

z = mean_diff / np.sqrt((model_variance + y_variance)/2)
print("Z : ",z)

# Compute Pearson correlation
corr, p_value = pearsonr(y, model_score)

print(f"Pearson correlation: {corr:.3f}")
print(f"P-value: {p_value:.3f}")

# Round the model predictions
model_score_rounded = np.rint(model_score).astype(int)

# Compute absolute errors for adjacent accuracy
errors = np.abs(y - model_score_rounded)
adjacent = np.sum(errors <= 3)
adjacent_accuracy = adjacent / len(y)
print(f'Adjacent accuracy: {adjacent_accuracy:.2%}')

# Compute exact accuracy
exact_matches = np.sum(y == model_score_rounded)
exact_accuracy = exact_matches / len(y)
print(f'Exact accuracy: {exact_accuracy:.2%}')

# Calculate MSE
mse = mean_squared_error(y, model_score)

# Calculate RMSE
rmse = np.sqrt(mse)

print(f'RMSE: {rmse:.3f}')

QWK Score:  0.7579343835000356
Human Agreement:  0.7214784742548883
Degradation:  -0.0364559092451473
Z :  0.002380282391673543
Pearson correlation: 0.781
P-value: 0.000
Adjacent accuracy: 78.78%
Exact accuracy: 14.09%
RMSE: 2.869


In [64]:
np.min(y)

2

In [65]:
np.max(y)

24