In [1]:
import xgboost
import joblib
import numpy as np
from quadratic_weighted_kappa import quadratic_weighted_kappa
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn.model_selection import train_test_split
from collections import Counter

In [2]:
x = joblib.load('essay_ease10_sbert768_simbow_langerr_780_normalized_asap6')
x_off = joblib.load('essay_asap6_780_with350offtopic')
y = joblib.load('score_asap6')
y_off = joblib.load('score_asap6_with350offtopic')
off = joblib.load('essay_350_offtopic_780')

In [3]:
print(x.shape)
print(x_off.shape)

(1800, 780)
(2150, 780)


In [4]:
print(y.shape)
print(y_off.shape)

(1800,)
(2150,)


In [5]:
print(off.shape)

(350, 780)


In [6]:
def get_feature_names_extended():
    ease_feats = ['Answer Length', 'Word Counts', 'Average Word Length', 'Good n-gram', 'Prompt Overlap',
              'Prompt Overlap (synonyms)', 'Punctuation Counts', 'Spelling Error', 'Unique Words', 'Prompt Similarity SBert']

    sbert_feats = []
    sbert_dim = 768

    for i in range(0, sbert_dim):
    	fname = "sbert_" + str(i) 
    	sbert_feats.append(fname)
    
    prompt_similarity_bow = ["Prompt Similarity BOW"]
    lang_error = ["Language Error"]
    
    feature_names = ease_feats + prompt_similarity_bow + lang_error + sbert_feats 

    print("len feature names: ", len(feature_names))
    
    return feature_names

feature_names = get_feature_names_extended()

len feature names:  780


### create 5-fold

In [7]:
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)
print(kf)

KFold(n_splits=5, random_state=42, shuffle=True)


In [8]:
model2 = xgboost.XGBRegressor(objective ='reg:squarederror',
                colsample_bytree=0.4,
                 gamma=0,                 
                 learning_rate=0.05,
                 max_depth=3,
                 min_child_weight=1.5,
                 n_estimators=1000,                                                                    
                 reg_alpha=0.75,
                 reg_lambda=0.45,
                 subsample=0.6,
                 seed=42) 

### training using original + off topic data (2150 essays)

In [9]:
qwk_scores = []
qwk_scores_ori = []
qwk_scores_off = []

acc_scores = []
acc_scores_ori = []
acc_scores_off = []

test_indices = []
test_indices_ori = []
test_indices_off = []

pred_labels = []
pred_labels_ori = []
pred_labels_off = []

counter = 1

for train_index, test_index in kf.split(x_off, y_off):
    
    print()
    print("Loop -", counter)
    print("========")
    counter = counter + 1
    
    X_train, X_test, Y_train, Y_test = x_off[train_index], x_off[test_index], y_off[train_index], y_off[test_index]
         
    model2.fit(X_train, Y_train)
    
    
    # PREDICT AND EVALUATE ALL ESSAYS
    predict = model2.predict(X_test)
    predict = np.round(predict)
    
    pred_labels.extend(predict)
    test_indices.extend(test_index)
    
    result_qwk = quadratic_weighted_kappa(Y_test, predict)
    print("Qwk : ", result_qwk)
    qwk_scores.append(result_qwk)
    
    result_acc = accuracy_score(Y_test, predict)
    print("Acc : ", result_acc)
    acc_scores.append(result_acc)
    
    print("len all : ", len(test_index))

    # PREDICT AND EVALUATE ONLY ORIGINAL ESSAY
    test_index_ori = [a for a in test_index if a < 1800]
    x_test_ori = x_off[test_index_ori]
    y_test_ori = y_off[test_index_ori]
    predict_ori = model2.predict(x_test_ori)
    predict_ori = np.round(predict_ori)
    pred_labels_ori.extend(predict_ori)
    
    result_qwk_ori = quadratic_weighted_kappa(y_test_ori, predict_ori)
    print("Qwk original : ", result_qwk_ori)
    qwk_scores_ori.append(result_qwk_ori)
    
    result_acc_ori = accuracy_score(y_test_ori, predict_ori)
    print("Acc original : ", result_acc_ori)
    acc_scores_ori.append(result_acc_ori)
    
    print("len ori : ", len(test_index_ori))
    
    # PREDICT AND EVALUATE ONLY GIBBERISH ESSAY
    test_index_off = [a for a in test_index if a > 1799]
    x_test_off = x_off[test_index_off]
    y_test_off = y_off[test_index_off]
    predict_off = model2.predict(x_test_off)
    predict_off = np.round(predict_off)
    pred_labels_off.extend(predict_off)
    
    result_acc_off = accuracy_score(y_test_off, predict_off)
    print("Acc off topic : ", result_acc_off)
    acc_scores_off.append(result_acc_off)
    
    print("len off : ", len(test_index_off))

print("\nMean QWK : ", np.mean(qwk_scores))
print("\nMean QWK Original : ", np.mean(qwk_scores_ori))
print("\nMean QWK Off Topic : ", np.mean(qwk_scores_off))

print("\nMean Accuracy : ", np.mean(acc_scores))
print("\nMean Accuracy Original : ", np.mean(acc_scores_ori))
print("\nMean Accuracy Off Topic : ", np.mean(acc_scores_off))


Loop - 1
Qwk :  0.9111760667724871
Acc :  0.7046511627906977
len all :  430
Qwk original :  0.7892781364683599
Acc original :  0.651685393258427
len ori :  356
Acc off topic :  0.9594594594594594
len off :  74

Loop - 2
Qwk :  0.9032109617545744
Acc :  0.7
len all :  430
Qwk original :  0.7650147450190606
Acc original :  0.6446280991735537
len ori :  363
Acc off topic :  1.0
len off :  67

Loop - 3
Qwk :  0.9095905832491764
Acc :  0.7255813953488373
len all :  430
Qwk original :  0.7995895330938944
Acc original :  0.6854838709677419
len ori :  372
Acc off topic :  0.9827586206896551
len off :  58

Loop - 4
Qwk :  0.9095073037880664
Acc :  0.7232558139534884
len all :  430
Qwk original :  0.8006529002201838
Acc original :  0.6815642458100558
len ori :  358
Acc off topic :  0.9305555555555556
len off :  72

Loop - 5
Qwk :  0.9129668729523116
Acc :  0.6906976744186046
len all :  430
Qwk original :  0.7822639372117486
Acc original :  0.6239316239316239
len ori :  351
Acc off topic :  0.98

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


#### also check for minus score!

In [10]:
print(Counter(pred_labels_ori))

Counter({3.0: 837, 2.0: 466, 4.0: 324, 1.0: 157, 0.0: 16})


In [11]:
print(Counter(pred_labels_off))

Counter({-0.0: 340, 1.0: 10})


In [12]:
sum(i < 1 for i in pred_labels_off)

340

In [13]:
# SO the Accuracies is 340 / 350
print("\nMean Accuracy Off Topic : ", sum(i < 1 for i in pred_labels_off) / len(pred_labels_off))


Mean Accuracy Off Topic :  0.9714285714285714


### training using original data (1800 essays)

In [14]:
qwk_scores = []

acc_scores = []
acc_scores_off = []

test_indices = []
test_indices_off = []

pred_labels = []
pred_labels_off = []

counter = 1

for train_index, test_index in kf.split(x, y):
    
    print()
    print("Loop -", counter)
    print("========")
    counter = counter + 1
    
    X_train, X_test, Y_train, Y_test = x[train_index], x[test_index], y[train_index], y[test_index]
         
    model2.fit(X_train, Y_train)    
    
    # PREDICT AND EVALUATE ORIGINAL ESSAYS
    predict = model2.predict(X_test)
    predict = np.round(predict)
    
    pred_labels.extend(predict)
    test_indices.extend(test_index)
    
    result_qwk = quadratic_weighted_kappa(Y_test, predict)
    print("Qwk : ", result_qwk)
    qwk_scores.append(result_qwk)
    
    result_acc = accuracy_score(Y_test, predict)
    print("Acc : ", result_acc)
    acc_scores.append(result_acc)
    
    print("len all : ", len(test_index))
    
    # PREDICT AND EVALUATE ONLY offBERISH ESSAY
    x_test_off = off
    y_test_off = np.zeros(350)
    predict_off = model2.predict(x_test_off)
    predict_off = np.round(predict_off)
    pred_labels_off.extend(predict_off)
    
    result_acc_off = accuracy_score(y_test_off, predict_off)
    print("Acc off topic : ", result_acc_off)
    acc_scores_off.append(result_acc_off)
    
    print("len off : ", len(x_test_off))

print("\nMean QWK : ", np.mean(qwk_scores))
print("\nMean QWK Off Topic : ", np.mean(qwk_scores_off))

print("\nMean Accuracy : ", np.mean(acc_scores))
print("\nMean Accuracy Off Topic : ", np.mean(acc_scores_off))


Loop - 1
Qwk :  0.8260045489006823
Acc :  0.7166666666666667
len all :  360
Acc off topic :  0.08
len off :  350

Loop - 2
Qwk :  0.7988882527839849
Acc :  0.6972222222222222
len all :  360
Acc off topic :  0.07428571428571429
len off :  350

Loop - 3
Qwk :  0.8027769012306721
Acc :  0.6611111111111111
len all :  360
Acc off topic :  0.014285714285714285
len off :  350

Loop - 4
Qwk :  0.7795355099293168
Acc :  0.6666666666666666
len all :  360
Acc off topic :  0.05714285714285714
len off :  350

Loop - 5
Qwk :  0.7854984894259819
Acc :  0.6222222222222222
len all :  360
Acc off topic :  0.06285714285714286
len off :  350

Mean QWK :  0.7985407404541276

Mean QWK Off Topic :  nan

Mean Accuracy :  0.6727777777777777

Mean Accuracy Off Topic :  0.05771428571428572


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [15]:
print(Counter(pred_labels_ori))
print(Counter(pred_labels_off))

Counter({3.0: 837, 2.0: 466, 4.0: 324, 1.0: 157, 0.0: 16})
Counter({1.0: 788, 2.0: 762, 0.0: 101, 3.0: 99})


In [37]:
model = joblib.load('model_asap6_extended_780_normalized')

d_off = xgboost.DMatrix(off, feature_names=feature_names)
pred = model.predict(d_off)

In [38]:
pred = np.round(pred)
pred

array([1., 1., 2., 2., 2., 1., 1., 2., 1., 2., 1., 1., 1., 1., 2., 2., 1.,
       2., 2., 2., 3., 1., 2., 1., 1., 1., 2., 3., 2., 2., 1., 2., 2., 1.,
       2., 2., 1., 1., 1., 2., 2., 1., 2., 2., 1., 2., 2., 2., 1., 2., 2.,
       2., 2., 3., 1., 2., 2., 2., 2., 2., 2., 2., 2., 1., 2., 2., 2., 2.,
       2., 3., 1., 3., 2., 0., 1., 2., 2., 2., 2., 1., 1., 2., 2., 2., 2.,
       3., 3., 2., 3., 2., 1., 2., 3., 2., 2., 2., 2., 3., 1., 2., 2., 3.,
       1., 3., 2., 2., 2., 1., 2., 1., 2., 1., 1., 2., 2., 1., 2., 2., 1.,
       1., 2., 2., 2., 1., 1., 1., 1., 3., 2., 2., 3., 3., 2., 1., 2., 3.,
       2., 1., 2., 1., 2., 2., 2., 2., 1., 2., 1., 2., 1., 3., 1., 1., 1.,
       2., 0., 0., 1., 2., 1., 2., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1.,
       1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 0., 1., 1., 1., 2., 1., 0.,
       1., 0., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 0., 2.,
       2., 1., 1., 1., 1., 2., 1., 1., 0., 1., 1., 0., 0., 2., 1., 1., 1.,
       1., 0., 2., 1., 1.

In [39]:
from collections import Counter
Counter(pred)

Counter({1.0: 169, 2.0: 142, 3.0: 18, 0.0: 21})

In [42]:
pred_failed = [a for a in pred if a < 2]
acc = (len(pred_failed) / len(pred)) * 100
print('Acc {:.2f}%'.format(round(acc,2)))

Acc 54.29%


In [43]:
pred_zero = [a for a in pred if a == 0]
acc = (len(pred_zero) / len(pred)) * 100
print('Acc {:.2f}%'.format(round(acc,2)))

Acc 6.00%


In [44]:
len(y_off)

2150

In [46]:
print(y_off)

[2. 3. 4. ... 0. 0. 0.]
