In [1]:
import xgboost
import joblib
import numpy as np
from quadratic_weighted_kappa import quadratic_weighted_kappa
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn.model_selection import train_test_split
from collections import Counter

In [2]:
x = joblib.load('essay_ease10_sbert768_simbow_langerr_780_normalized_asap4')
x_off = joblib.load('essay_asap4_780_with350offtopic')
y = joblib.load('score_asap4')
y_off = joblib.load('score_asap4_with350offtopic')
off = joblib.load('essay_350_offtopic_780_except4')

In [3]:
print(x.shape)
print(x_off.shape)

(1772, 780)
(2122, 780)


In [4]:
print(y.shape)
print(y_off.shape)

(1772,)
(2122,)


In [5]:
print(off.shape)

(350, 780)


In [6]:
def get_feature_names_extended():
    ease_feats = ['Answer Length', 'Word Counts', 'Average Word Length', 'Good n-gram', 'Prompt Overlap',
              'Prompt Overlap (synonyms)', 'Punctuation Counts', 'Spelling Error', 'Unique Words', 'Prompt Similarity SBert']

    sbert_feats = []
    sbert_dim = 768

    for i in range(0, sbert_dim):
    	fname = "sbert_" + str(i) 
    	sbert_feats.append(fname)
    
    prompt_similarity_bow = ["Prompt Similarity BOW"]
    lang_error = ["Language Error"]
    
    feature_names = ease_feats + prompt_similarity_bow + lang_error + sbert_feats 

    print("len feature names: ", len(feature_names))
    
    return feature_names

feature_names = get_feature_names_extended()

len feature names:  780


### create 5-fold

In [10]:
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)
print(kf)

KFold(n_splits=5, random_state=42, shuffle=True)


In [11]:
model2 = xgboost.XGBRegressor(objective ='reg:squarederror',
                colsample_bytree=0.4,
                 gamma=0,                 
                 learning_rate=0.05,
                 max_depth=3,
                 min_child_weight=1.5,
                 n_estimators=1000,                                                                    
                 reg_alpha=0.75,
                 reg_lambda=0.45,
                 subsample=0.6,
                 seed=42)

### training using original + off topic data (2122 essays)

In [12]:
qwk_scores = []
qwk_scores_ori = []

acc_scores = []
acc_scores_ori = []
acc_scores_off = []

test_indices = []
test_indices_ori = []
test_indices_off = []

pred_labels = []
pred_labels_ori = []
pred_labels_off = []

counter = 1

for train_index, test_index in kf.split(x_off, y_off):
    
    print()
    print("Loop -", counter)
    print("========")
    counter = counter + 1
    
    X_train, X_test, Y_train, Y_test = x_off[train_index], x_off[test_index], y_off[train_index], y_off[test_index]
         
    model2.fit(X_train, Y_train)
    
    
    # PREDICT AND EVALUATE ALL ESSAYS
    predict = model2.predict(X_test)
    predict = np.round(predict)
    
    pred_labels.extend(predict)
    test_indices.extend(test_index)
    
    result_qwk = quadratic_weighted_kappa(Y_test, predict)
    print("Qwk : ", result_qwk)
    qwk_scores.append(result_qwk)
    
    result_acc = accuracy_score(Y_test, predict)
    print("Acc : ", result_acc)
    acc_scores.append(result_acc)
    
    print("len all : ", len(test_index))

    # PREDICT AND EVALUATE ONLY ORIGINAL ESSAY
    test_index_ori = [a for a in test_index if a < 1772]
    x_test_ori = x_off[test_index_ori]
    y_test_ori = y_off[test_index_ori]
    predict_ori = model2.predict(x_test_ori)
    predict_ori = np.round(predict_ori)
    pred_labels_ori.extend(predict_ori)
    
    result_qwk_ori = quadratic_weighted_kappa(y_test_ori, predict_ori)
    print("Qwk original : ", result_qwk_ori)
    qwk_scores_ori.append(result_qwk_ori)
    
    result_acc_ori = accuracy_score(y_test_ori, predict_ori)
    print("Acc original : ", result_acc_ori)
    acc_scores_ori.append(result_acc_ori)
    
    print("len ori : ", len(test_index_ori))
    
    # PREDICT AND EVALUATE ONLY OFF-TOPIC ESSAY
    test_index_off = [a for a in test_index if a > 1771]
    x_test_off = x_off[test_index_off]
    y_test_off = y_off[test_index_off]
    predict_off = model2.predict(x_test_off)
    predict_off = np.round(predict_off)
    pred_labels_off.extend(predict_off)
    
    result_acc_off = accuracy_score(y_test_off, predict_off)
    print("Acc off topic : ", result_acc_off)
    acc_scores_off.append(result_acc_off)
    
    print("len off : ", len(test_index_off))

print("\nMean QWK : ", np.mean(qwk_scores))
print("\nMean QWK Original : ", np.mean(qwk_scores_ori))

print("\nMean Accuracy : ", np.mean(acc_scores))
print("\nMean Accuracy Original : ", np.mean(acc_scores_ori))
print("\nMean Accuracy Off Topic : ", np.mean(acc_scores_off))


Loop - 1
Qwk :  0.8165710296643677
Acc :  0.7105882352941176
len all :  425
Qwk original :  0.7482342825557352
Acc original :  0.6598837209302325
len ori :  344
Acc off topic :  0.9259259259259259
len off :  81

Loop - 2
Qwk :  0.8215936945003008
Acc :  0.6847058823529412
len all :  425
Qwk original :  0.7608976398585039
Acc original :  0.6410958904109589
len ori :  365
Acc off topic :  0.95
len off :  60

Loop - 3
Qwk :  0.8353482323475581
Acc :  0.7240566037735849
len all :  424
Qwk original :  0.7881180743085948
Acc original :  0.6948228882833788
len ori :  367
Acc off topic :  0.9122807017543859
len off :  57

Loop - 4
Qwk :  0.8236505086774386
Acc :  0.7004716981132075
len all :  424
Qwk original :  0.7681376805873529
Acc original :  0.6629213483146067
len ori :  356
Acc off topic :  0.8970588235294118
len off :  68

Loop - 5
Qwk :  0.8421453568626103
Acc :  0.714622641509434
len all :  424
Qwk original :  0.7829207032651596
Acc original :  0.6705882352941176
len ori :  340
Acc o

In [13]:
Counter(pred_labels_ori)

Counter({3.0: 200, 1.0: 746, 2.0: 592, 0.0: 234})

In [14]:
Counter(pred_labels_off)

Counter({0.0: 320, 1.0: 30})

### training using original data (1772 essays)

In [15]:
qwk_scores = []

acc_scores = []
acc_scores_off = []

test_indices = []
test_indices_off = []

pred_labels = []
pred_labels_off = []

counter = 1

for train_index, test_index in kf.split(x, y):
    
    print()
    print("Loop -", counter)
    print("========")
    counter = counter + 1
    
    X_train, X_test, Y_train, Y_test = x[train_index], x[test_index], y[train_index], y[test_index]
         
    model2.fit(X_train, Y_train)    
    
    # PREDICT AND EVALUATE ORIGINAL ESSAYS
    predict = model2.predict(X_test)
    predict = np.round(predict)
    
    pred_labels.extend(predict)
    test_indices.extend(test_index)
    
    result_qwk = quadratic_weighted_kappa(Y_test, predict)
    print("Qwk : ", result_qwk)
    qwk_scores.append(result_qwk)
    
    result_acc = accuracy_score(Y_test, predict)
    print("Acc : ", result_acc)
    acc_scores.append(result_acc)
    
    print("len all : ", len(test_index))
    
    # PREDICT AND EVALUATE ONLY offBERISH ESSAY
    x_test_off = off
    y_test_off = np.zeros(350)
    predict_off = model2.predict(x_test_off)
    predict_off = np.round(predict_off)
    pred_labels_off.extend(predict_off)
    
    result_acc_off = accuracy_score(y_test_off, predict_off)
    print("Acc off topic : ", result_acc_off)
    acc_scores_off.append(result_acc_off)
    
    print("len off : ", len(x_test_off))

print("\nMean QWK : ", np.mean(qwk_scores))

print("\nMean Accuracy : ", np.mean(acc_scores))
print("\nMean Accuracy Off Topic : ", np.mean(acc_scores_off))


Loop - 1
Qwk :  0.7889320832380516
Acc :  0.6873239436619718
len all :  355
Acc off topic :  0.05714285714285714
len off :  350

Loop - 2
Qwk :  0.7567900610151411
Acc :  0.6450704225352113
len all :  355
Acc off topic :  0.03142857142857143
len off :  350

Loop - 3
Qwk :  0.7580874081394493
Acc :  0.6666666666666666
len all :  354
Acc off topic :  0.05142857142857143
len off :  350

Loop - 4
Qwk :  0.7500549157435592
Acc :  0.652542372881356
len all :  354
Acc off topic :  0.04285714285714286
len off :  350

Loop - 5
Qwk :  0.8139692013175305
Acc :  0.692090395480226
len all :  354
Acc off topic :  0.03428571428571429
len off :  350

Mean QWK :  0.7735667338907464

Mean Accuracy :  0.6687387602450864

Mean Accuracy Off Topic :  0.043428571428571434


In [16]:
print(Counter(pred_labels_ori))
print(Counter(pred_labels_off))

Counter({1.0: 746, 2.0: 592, 0.0: 234, 3.0: 200})
Counter({2.0: 1052, 1.0: 597, 0.0: 76, 3.0: 25})


In [37]:
model = joblib.load('model_asap6_extended_780_normalized')

d_off = xgboost.DMatrix(off, feature_names=feature_names)
pred = model.predict(d_off)

In [38]:
pred = np.round(pred)
pred

array([1., 1., 2., 2., 2., 1., 1., 2., 1., 2., 1., 1., 1., 1., 2., 2., 1.,
       2., 2., 2., 3., 1., 2., 1., 1., 1., 2., 3., 2., 2., 1., 2., 2., 1.,
       2., 2., 1., 1., 1., 2., 2., 1., 2., 2., 1., 2., 2., 2., 1., 2., 2.,
       2., 2., 3., 1., 2., 2., 2., 2., 2., 2., 2., 2., 1., 2., 2., 2., 2.,
       2., 3., 1., 3., 2., 0., 1., 2., 2., 2., 2., 1., 1., 2., 2., 2., 2.,
       3., 3., 2., 3., 2., 1., 2., 3., 2., 2., 2., 2., 3., 1., 2., 2., 3.,
       1., 3., 2., 2., 2., 1., 2., 1., 2., 1., 1., 2., 2., 1., 2., 2., 1.,
       1., 2., 2., 2., 1., 1., 1., 1., 3., 2., 2., 3., 3., 2., 1., 2., 3.,
       2., 1., 2., 1., 2., 2., 2., 2., 1., 2., 1., 2., 1., 3., 1., 1., 1.,
       2., 0., 0., 1., 2., 1., 2., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1.,
       1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 0., 1., 1., 1., 2., 1., 0.,
       1., 0., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 0., 2.,
       2., 1., 1., 1., 1., 2., 1., 1., 0., 1., 1., 0., 0., 2., 1., 1., 1.,
       1., 0., 2., 1., 1.

In [39]:
from collections import Counter
Counter(pred)

Counter({1.0: 169, 2.0: 142, 3.0: 18, 0.0: 21})

In [42]:
pred_failed = [a for a in pred if a < 2]
acc = (len(pred_failed) / len(pred)) * 100
print('Acc {:.2f}%'.format(round(acc,2)))

Acc 54.29%


In [43]:
pred_zero = [a for a in pred if a == 0]
acc = (len(pred_zero) / len(pred)) * 100
print('Acc {:.2f}%'.format(round(acc,2)))

Acc 6.00%


In [44]:
len(y_off)

2150

In [46]:
print(y_off)

[2. 3. 4. ... 0. 0. 0.]
