In [1]:
import xgboost
import joblib
import numpy as np
from quadratic_weighted_kappa import quadratic_weighted_kappa
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn.model_selection import train_test_split
from collections import Counter

In [2]:
x = joblib.load('essay_ease10_sbert768_simbow_langerr_780_normalized_asap7')
x_off = joblib.load('essay_asap7_780_with350offtopic')
y = joblib.load('score_asap7')
y_off = joblib.load('score_asap7_with350offtopic')
off = joblib.load('essay_350_offtopic_780_except7')

In [3]:
print(x.shape)
print(x_off.shape)

(1569, 780)
(1919, 780)


In [4]:
print(y.shape)
print(y_off.shape)

(1569,)
(1919,)


In [5]:
print(off.shape)

(350, 780)


In [6]:
def get_feature_names_extended():
    ease_feats = ['Answer Length', 'Word Counts', 'Average Word Length', 'Good n-gram', 'Prompt Overlap',
              'Prompt Overlap (synonyms)', 'Punctuation Counts', 'Spelling Error', 'Unique Words', 'Prompt Similarity SBert']

    sbert_feats = []
    sbert_dim = 768

    for i in range(0, sbert_dim):
    	fname = "sbert_" + str(i) 
    	sbert_feats.append(fname)
    
    prompt_similarity_bow = ["Prompt Similarity BOW"]
    lang_error = ["Language Error"]
    
    feature_names = ease_feats + prompt_similarity_bow + lang_error + sbert_feats 

    print("len feature names: ", len(feature_names))
    
    return feature_names

feature_names = get_feature_names_extended()

len feature names:  780


### create 5-fold

In [7]:
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)
print(kf)

KFold(n_splits=5, random_state=42, shuffle=True)


In [8]:
model2 = xgboost.XGBRegressor(objective ='reg:squarederror',
                colsample_bytree=0.4,
                 gamma=0,                 
                 learning_rate=0.03,
                 max_depth=3,
                 min_child_weight=1.5,
                 n_estimators=1000,                                                                    
                 reg_alpha=0.75,
                 reg_lambda=0.45,
                 subsample=0.6,
                 seed=42) 

### training using original + off topic data (1919 essays)

In [9]:
qwk_scores = []
qwk_scores_ori = []

acc_scores = []
acc_scores_ori = []
acc_scores_off = []

test_indices = []
test_indices_ori = []
test_indices_off = []

pred_labels = []
pred_labels_ori = []
pred_labels_off = []

counter = 1

for train_index, test_index in kf.split(x_off, y_off):
    
    print()
    print("Loop -", counter)
    print("========")
    counter = counter + 1
    
    X_train, X_test, Y_train, Y_test = x_off[train_index], x_off[test_index], y_off[train_index], y_off[test_index]
         
    model2.fit(X_train, Y_train)
    
    
    # PREDICT AND EVALUATE ALL ESSAYS
    predict = model2.predict(X_test)
    predict = np.round(predict)
    
    pred_labels.extend(predict)
    test_indices.extend(test_index)
    
    result_qwk = quadratic_weighted_kappa(Y_test, predict)
    print("Qwk : ", result_qwk)
    qwk_scores.append(result_qwk)
    
    result_acc = accuracy_score(Y_test, predict)
    print("Acc : ", result_acc)
    acc_scores.append(result_acc)
    
    print("len all : ", len(test_index))

    # PREDICT AND EVALUATE ONLY ORIGINAL ESSAY
    test_index_ori = [a for a in test_index if a < 1569]
    x_test_ori = x_off[test_index_ori]
    y_test_ori = y_off[test_index_ori]
    predict_ori = model2.predict(x_test_ori)
    predict_ori = np.round(predict_ori)
    pred_labels_ori.extend(predict_ori)
    
    result_qwk_ori = quadratic_weighted_kappa(y_test_ori, predict_ori)
    print("Qwk original : ", result_qwk_ori)
    qwk_scores_ori.append(result_qwk_ori)
    
    result_acc_ori = accuracy_score(y_test_ori, predict_ori)
    print("Acc original : ", result_acc_ori)
    acc_scores_ori.append(result_acc_ori)
    
    print("len ori : ", len(test_index_ori))
    
    # PREDICT AND EVALUATE ONLY OFF-TOPIC ESSAY
    test_index_off = [a for a in test_index if a > 1568]
    x_test_off = x_off[test_index_off]
    y_test_off = y_off[test_index_off]
    predict_off = model2.predict(x_test_off)
    predict_off = np.round(predict_off)
    pred_labels_off.extend(predict_off)
    
    result_acc_off = accuracy_score(y_test_off, predict_off)
    print("Acc off topic : ", result_acc_off)
    acc_scores_off.append(result_acc_off)
    
    print("len off : ", len(test_index_off))

print("\nMean QWK : ", np.mean(qwk_scores))
print("\nMean QWK Original : ", np.mean(qwk_scores_ori))

print("\nMean Accuracy : ", np.mean(acc_scores))
print("\nMean Accuracy Original : ", np.mean(acc_scores_ori))
print("\nMean Accuracy Off Topic : ", np.mean(acc_scores_off))


Loop - 1
Qwk :  0.9016426144410878
Acc :  0.171875
len all :  384
Qwk original :  0.7524821832400828
Acc original :  0.14873417721518986
len ori :  316
Acc off topic :  0.27941176470588236
len off :  68

Loop - 2
Qwk :  0.9043371237849961
Acc :  0.1640625
len all :  384
Qwk original :  0.7440852275095724
Acc original :  0.1619047619047619
len ori :  315
Acc off topic :  0.17391304347826086
len off :  69

Loop - 3
Qwk :  0.895320334589317
Acc :  0.15364583333333334
len all :  384
Qwk original :  0.7236140537430722
Acc original :  0.14826498422712933
len ori :  317
Acc off topic :  0.1791044776119403
len off :  67

Loop - 4
Qwk :  0.9038776389059072
Acc :  0.16145833333333334
len all :  384
Qwk original :  0.7342747931717638
Acc original :  0.1536050156739812
len ori :  319
Acc off topic :  0.2
len off :  65

Loop - 5
Qwk :  0.8856273988588923
Acc :  0.16449086161879894
len all :  383
Qwk original :  0.657827529032262
Acc original :  0.1490066225165563
len ori :  302
Acc off topic :  0.

#### also check for minus score!

In [10]:
print(Counter(pred_labels_ori))

Counter({17.0: 166, 16.0: 153, 18.0: 150, 20.0: 132, 14.0: 132, 15.0: 130, 19.0: 124, 13.0: 118, 12.0: 109, 21.0: 81, 11.0: 70, 10.0: 51, 9.0: 47, 22.0: 45, 7.0: 19, 8.0: 18, 23.0: 14, 6.0: 4, 3.0: 2, 24.0: 2, 5.0: 1, 4.0: 1})


In [11]:
print(Counter(pred_labels_off))

Counter({-0.0: 74, -1.0: 57, 1.0: 55, 3.0: 36, 2.0: 35, -2.0: 22, 4.0: 17, 5.0: 14, 7.0: 9, 9.0: 6, 8.0: 5, -3.0: 4, 6.0: 4, 11.0: 4, -4.0: 3, 12.0: 2, 20.0: 1, 10.0: 1, 15.0: 1})


In [18]:
sum(i < 4 for i in pred_labels_off)

286

In [19]:
# SO the Accuracies is 160 / 350
print("\nMean Accuracy Off Topic : ", sum(i < 4 for i in pred_labels_off) / len(pred_labels_off))


Mean Accuracy Off Topic :  0.8171428571428572


### training using original data (1569 essays)

In [13]:
qwk_scores = []

acc_scores = []
acc_scores_off = []

test_indices = []
test_indices_off = []

pred_labels = []
pred_labels_off = []

counter = 1

for train_index, test_index in kf.split(x, y):
    
    print()
    print("Loop -", counter)
    print("========")
    counter = counter + 1
    
    X_train, X_test, Y_train, Y_test = x[train_index], x[test_index], y[train_index], y[test_index]
         
    model2.fit(X_train, Y_train)    
    
    # PREDICT AND EVALUATE ORIGINAL ESSAYS
    predict = model2.predict(X_test)
    predict = np.round(predict)
    
    pred_labels.extend(predict)
    test_indices.extend(test_index)
    
    result_qwk = quadratic_weighted_kappa(Y_test, predict)
    print("Qwk : ", result_qwk)
    qwk_scores.append(result_qwk)
    
    result_acc = accuracy_score(Y_test, predict)
    print("Acc : ", result_acc)
    acc_scores.append(result_acc)
    
    print("len all : ", len(test_index))
    
    # PREDICT AND EVALUATE ONLY offBERISH ESSAY
    x_test_off = off
    y_test_off = np.zeros(350)
    predict_off = model2.predict(x_test_off)
    predict_off = np.round(predict_off)
    pred_labels_off.extend(predict_off)
    
    result_acc_off = accuracy_score(y_test_off, predict_off)
    print("Acc off topic : ", result_acc_off)
    acc_scores_off.append(result_acc_off)
    
    print("len off : ", len(x_test_off))

print("\nMean QWK : ", np.mean(qwk_scores))

print("\nMean Accuracy : ", np.mean(acc_scores))
print("\nMean Accuracy Off Topic : ", np.mean(acc_scores_off))


Loop - 1
Qwk :  0.7931416123535302
Acc :  0.16560509554140126
len all :  314
Acc off topic :  0.0
len off :  350

Loop - 2
Qwk :  0.7780216629342351
Acc :  0.15605095541401273
len all :  314
Acc off topic :  0.0
len off :  350

Loop - 3
Qwk :  0.8062406348636821
Acc :  0.14012738853503184
len all :  314
Acc off topic :  0.0
len off :  350

Loop - 4
Qwk :  0.7932437700883994
Acc :  0.16878980891719744
len all :  314
Acc off topic :  0.0
len off :  350

Loop - 5
Qwk :  0.7149977236239904
Acc :  0.1182108626198083
len all :  313
Acc off topic :  0.0
len off :  350

Mean QWK :  0.7771290807727675

Mean Accuracy :  0.1497568222054903

Mean Accuracy Off Topic :  0.0


In [14]:
print(Counter(pred_labels_ori))
print(Counter(pred_labels_off))

Counter({17.0: 166, 16.0: 153, 18.0: 150, 20.0: 132, 14.0: 132, 15.0: 130, 19.0: 124, 13.0: 118, 12.0: 109, 21.0: 81, 11.0: 70, 10.0: 51, 9.0: 47, 22.0: 45, 7.0: 19, 8.0: 18, 23.0: 14, 6.0: 4, 3.0: 2, 24.0: 2, 5.0: 1, 4.0: 1})
Counter({16.0: 240, 18.0: 217, 17.0: 216, 15.0: 197, 14.0: 144, 19.0: 142, 13.0: 111, 20.0: 106, 12.0: 66, 22.0: 64, 23.0: 61, 21.0: 52, 11.0: 46, 10.0: 41, 24.0: 24, 9.0: 9, 25.0: 7, 8.0: 6, 7.0: 1})


In [37]:
model = joblib.load('model_asap6_extended_780_normalized')

d_off = xgboost.DMatrix(off, feature_names=feature_names)
pred = model.predict(d_off)

In [38]:
pred = np.round(pred)
pred

array([1., 1., 2., 2., 2., 1., 1., 2., 1., 2., 1., 1., 1., 1., 2., 2., 1.,
       2., 2., 2., 3., 1., 2., 1., 1., 1., 2., 3., 2., 2., 1., 2., 2., 1.,
       2., 2., 1., 1., 1., 2., 2., 1., 2., 2., 1., 2., 2., 2., 1., 2., 2.,
       2., 2., 3., 1., 2., 2., 2., 2., 2., 2., 2., 2., 1., 2., 2., 2., 2.,
       2., 3., 1., 3., 2., 0., 1., 2., 2., 2., 2., 1., 1., 2., 2., 2., 2.,
       3., 3., 2., 3., 2., 1., 2., 3., 2., 2., 2., 2., 3., 1., 2., 2., 3.,
       1., 3., 2., 2., 2., 1., 2., 1., 2., 1., 1., 2., 2., 1., 2., 2., 1.,
       1., 2., 2., 2., 1., 1., 1., 1., 3., 2., 2., 3., 3., 2., 1., 2., 3.,
       2., 1., 2., 1., 2., 2., 2., 2., 1., 2., 1., 2., 1., 3., 1., 1., 1.,
       2., 0., 0., 1., 2., 1., 2., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1.,
       1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 0., 1., 1., 1., 2., 1., 0.,
       1., 0., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 0., 2.,
       2., 1., 1., 1., 1., 2., 1., 1., 0., 1., 1., 0., 0., 2., 1., 1., 1.,
       1., 0., 2., 1., 1.

In [39]:
from collections import Counter
Counter(pred)

Counter({1.0: 169, 2.0: 142, 3.0: 18, 0.0: 21})

In [42]:
pred_failed = [a for a in pred if a < 2]
acc = (len(pred_failed) / len(pred)) * 100
print('Acc {:.2f}%'.format(round(acc,2)))

Acc 54.29%


In [43]:
pred_zero = [a for a in pred if a == 0]
acc = (len(pred_zero) / len(pred)) * 100
print('Acc {:.2f}%'.format(round(acc,2)))

Acc 6.00%


In [44]:
len(y_off)

2150

In [46]:
print(y_off)

[2. 3. 4. ... 0. 0. 0.]
