In [1]:
import xgboost
import joblib
import numpy as np
from quadratic_weighted_kappa import quadratic_weighted_kappa
from sklearn.metrics import accuracy_score
import pandas as pd
from sklearn.model_selection import train_test_split
from collections import Counter

In [2]:
x = joblib.load('essay_ease10_sbert768_simbow_langerr_780_normalized_asap5')
x_off = joblib.load('essay_asap5_780_with350offtopic')
y = joblib.load('score_asap5')
y_off = joblib.load('score_asap5_with350offtopic')
off = joblib.load('essay_350_offtopic_780_except5')

In [3]:
print(x.shape)
print(x_off.shape)

(1805, 780)
(2155, 780)


In [4]:
print(y.shape)
print(y_off.shape)

(1805,)
(2155,)


In [5]:
print(off.shape)

(350, 780)


In [6]:
def get_feature_names_extended():
    ease_feats = ['Answer Length', 'Word Counts', 'Average Word Length', 'Good n-gram', 'Prompt Overlap',
              'Prompt Overlap (synonyms)', 'Punctuation Counts', 'Spelling Error', 'Unique Words', 'Prompt Similarity SBert']

    sbert_feats = []
    sbert_dim = 768

    for i in range(0, sbert_dim):
    	fname = "sbert_" + str(i) 
    	sbert_feats.append(fname)
    
    prompt_similarity_bow = ["Prompt Similarity BOW"]
    lang_error = ["Language Error"]
    
    feature_names = ease_feats + prompt_similarity_bow + lang_error + sbert_feats 

    print("len feature names: ", len(feature_names))
    
    return feature_names

feature_names = get_feature_names_extended()

len feature names:  780


### create 5-fold

In [7]:
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)
print(kf)

KFold(n_splits=5, random_state=42, shuffle=True)


In [8]:
model2 = xgboost.XGBRegressor(objective ='reg:squarederror',
                colsample_bytree=0.4,
                 gamma=0,                 
                 learning_rate=0.07,
                 max_depth=3,
                 min_child_weight=1.5,
                 n_estimators=1000,                                                                    
                 reg_alpha=0.75,
                 reg_lambda=0.45,
                 subsample=0.6,
                 seed=42) 

### training using original + off topic data (2155 essays)

In [9]:
qwk_scores = []
qwk_scores_ori = []

acc_scores = []
acc_scores_ori = []
acc_scores_off = []

test_indices = []
test_indices_ori = []
test_indices_off = []

pred_labels = []
pred_labels_ori = []
pred_labels_off = []

counter = 1

for train_index, test_index in kf.split(x_off, y_off):
    
    print()
    print("Loop -", counter)
    print("========")
    counter = counter + 1
    
    X_train, X_test, Y_train, Y_test = x_off[train_index], x_off[test_index], y_off[train_index], y_off[test_index]
         
    model2.fit(X_train, Y_train)
    
    
    # PREDICT AND EVALUATE ALL ESSAYS
    predict = model2.predict(X_test)
    predict = np.round(predict)
    
    pred_labels.extend(predict)
    test_indices.extend(test_index)
    
    result_qwk = quadratic_weighted_kappa(Y_test, predict)
    print("Qwk : ", result_qwk)
    qwk_scores.append(result_qwk)
    
    result_acc = accuracy_score(Y_test, predict)
    print("Acc : ", result_acc)
    acc_scores.append(result_acc)
    
    print("len all : ", len(test_index))

    # PREDICT AND EVALUATE ONLY ORIGINAL ESSAY
    test_index_ori = [a for a in test_index if a < 1805]
    x_test_ori = x_off[test_index_ori]
    y_test_ori = y_off[test_index_ori]
    predict_ori = model2.predict(x_test_ori)
    predict_ori = np.round(predict_ori)
    pred_labels_ori.extend(predict_ori)
    
    result_qwk_ori = quadratic_weighted_kappa(y_test_ori, predict_ori)
    print("Qwk original : ", result_qwk_ori)
    qwk_scores_ori.append(result_qwk_ori)
    
    result_acc_ori = accuracy_score(y_test_ori, predict_ori)
    print("Acc original : ", result_acc_ori)
    acc_scores_ori.append(result_acc_ori)
    
    print("len ori : ", len(test_index_ori))
    
    # PREDICT AND EVALUATE ONLY OFF-TOPIC ESSAY
    test_index_off = [a for a in test_index if a > 1804]
    x_test_off = x_off[test_index_off]
    y_test_off = y_off[test_index_off]
    predict_off = model2.predict(x_test_off)
    predict_off = np.round(predict_off)
    pred_labels_off.extend(predict_off)
    
    result_acc_off = accuracy_score(y_test_off, predict_off)
    print("Acc off topic : ", result_acc_off)
    acc_scores_off.append(result_acc_off)
    
    print("len off : ", len(test_index_off))

print("\nMean QWK : ", np.mean(qwk_scores))
print("\nMean QWK Original : ", np.mean(qwk_scores_ori))

print("\nMean Accuracy : ", np.mean(acc_scores))
print("\nMean Accuracy Original : ", np.mean(acc_scores_ori))
print("\nMean Accuracy Off Topic : ", np.mean(acc_scores_off))


Loop - 1
Qwk :  0.8967100229533282
Acc :  0.7215777262180975
len all :  431
Qwk original :  0.8142253149696728
Acc original :  0.6927374301675978
len ori :  358
Acc off topic :  0.863013698630137
len off :  73

Loop - 2
Qwk :  0.8937118527893929
Acc :  0.7122969837587007
len all :  431
Qwk original :  0.7995202961217021
Acc original :  0.6721311475409836
len ori :  366
Acc off topic :  0.9384615384615385
len off :  65

Loop - 3
Qwk :  0.8861467706838719
Acc :  0.691415313225058
len all :  431
Qwk original :  0.8128063272683697
Acc original :  0.6720867208672087
len ori :  369
Acc off topic :  0.8064516129032258
len off :  62

Loop - 4
Qwk :  0.8816255047154332
Acc :  0.6844547563805105
len all :  431
Qwk original :  0.7567683870317979
Acc original :  0.637883008356546
len ori :  359
Acc off topic :  0.9166666666666666
len off :  72

Loop - 5
Qwk :  0.8997434456388306
Acc :  0.6960556844547564
len all :  431
Qwk original :  0.7923955863408078
Acc original :  0.660056657223796
len ori :

#### also check for minus score!

In [10]:
print(Counter(pred_labels_ori))

Counter({2.0: 693, 3.0: 582, 1.0: 301, 4.0: 222, -0.0: 7})


In [11]:
print(Counter(pred_labels_off))

Counter({-0.0: 307, 1.0: 41, 2.0: 1, -1.0: 1})


In [12]:
sum(i < 1 for i in pred_labels_off)

308

In [13]:
# SO the Accuracies is 194 / 350
print("\nMean Accuracy Off Topic : ", sum(i < 1 for i in pred_labels_off) / len(pred_labels_off))


Mean Accuracy Off Topic :  0.88


### training using original data (1805 essays)

In [16]:
qwk_scores = []

acc_scores = []
acc_scores_off = []

test_indices = []
test_indices_off = []

pred_labels = []
pred_labels_off = []

counter = 1

for train_index, test_index in kf.split(x, y):
    
    print()
    print("Loop -", counter)
    print("========")
    counter = counter + 1
    
    X_train, X_test, Y_train, Y_test = x[train_index], x[test_index], y[train_index], y[test_index]
         
    model2.fit(X_train, Y_train)    
    
    # PREDICT AND EVALUATE ORIGINAL ESSAYS
    predict = model2.predict(X_test)
    predict = np.round(predict)
    
    pred_labels.extend(predict)
    test_indices.extend(test_index)
    
    result_qwk = quadratic_weighted_kappa(Y_test, predict)
    print("Qwk : ", result_qwk)
    qwk_scores.append(result_qwk)
    
    result_acc = accuracy_score(Y_test, predict)
    print("Acc : ", result_acc)
    acc_scores.append(result_acc)
    
    print("len all : ", len(test_index))
    
    # PREDICT AND EVALUATE ONLY offBERISH ESSAY
    x_test_off = off
    y_test_off = np.zeros(350)
    predict_off = model2.predict(x_test_off)
    predict_off = np.round(predict_off)
    pred_labels_off.extend(predict_off)
    
    result_acc_off = accuracy_score(y_test_off, predict_off)
    print("Acc off topic : ", result_acc_off)
    acc_scores_off.append(result_acc_off)
    
    print("len off : ", len(x_test_off))

print("\nMean QWK : ", np.mean(qwk_scores))

print("\nMean Accuracy : ", np.mean(acc_scores))
print("\nMean Accuracy Off Topic : ", np.mean(acc_scores_off))


Loop - 1
Qwk :  0.7838152757725856
Acc :  0.7008310249307479
len all :  361
Acc off topic :  0.0
len off :  350

Loop - 2
Qwk :  0.7969935540186665
Acc :  0.6703601108033241
len all :  361
Acc off topic :  0.0
len off :  350

Loop - 3
Qwk :  0.8273125645997591
Acc :  0.6980609418282548
len all :  361
Acc off topic :  0.02
len off :  350

Loop - 4
Qwk :  0.7991864015252848
Acc :  0.6537396121883656
len all :  361
Acc off topic :  0.002857142857142857
len off :  350

Loop - 5
Qwk :  0.8252928789114105
Acc :  0.6925207756232687
len all :  361
Acc off topic :  0.005714285714285714
len off :  350

Mean QWK :  0.8065201349655412

Mean Accuracy :  0.6831024930747922

Mean Accuracy Off Topic :  0.005714285714285714


In [17]:
print(Counter(pred_labels_ori))
print(Counter(pred_labels_off))

Counter({2.0: 693, 3.0: 582, 1.0: 301, 4.0: 222, -0.0: 7})
Counter({2.0: 871, 1.0: 452, 3.0: 416, 0.0: 10, 4.0: 1})


In [37]:
model = joblib.load('model_asap6_extended_780_normalized')

d_off = xgboost.DMatrix(off, feature_names=feature_names)
pred = model.predict(d_off)

In [38]:
pred = np.round(pred)
pred

array([1., 1., 2., 2., 2., 1., 1., 2., 1., 2., 1., 1., 1., 1., 2., 2., 1.,
       2., 2., 2., 3., 1., 2., 1., 1., 1., 2., 3., 2., 2., 1., 2., 2., 1.,
       2., 2., 1., 1., 1., 2., 2., 1., 2., 2., 1., 2., 2., 2., 1., 2., 2.,
       2., 2., 3., 1., 2., 2., 2., 2., 2., 2., 2., 2., 1., 2., 2., 2., 2.,
       2., 3., 1., 3., 2., 0., 1., 2., 2., 2., 2., 1., 1., 2., 2., 2., 2.,
       3., 3., 2., 3., 2., 1., 2., 3., 2., 2., 2., 2., 3., 1., 2., 2., 3.,
       1., 3., 2., 2., 2., 1., 2., 1., 2., 1., 1., 2., 2., 1., 2., 2., 1.,
       1., 2., 2., 2., 1., 1., 1., 1., 3., 2., 2., 3., 3., 2., 1., 2., 3.,
       2., 1., 2., 1., 2., 2., 2., 2., 1., 2., 1., 2., 1., 3., 1., 1., 1.,
       2., 0., 0., 1., 2., 1., 2., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1.,
       1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 0., 1., 1., 1., 2., 1., 0.,
       1., 0., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 0., 2.,
       2., 1., 1., 1., 1., 2., 1., 1., 0., 1., 1., 0., 0., 2., 1., 1., 1.,
       1., 0., 2., 1., 1.

In [39]:
from collections import Counter
Counter(pred)

Counter({1.0: 169, 2.0: 142, 3.0: 18, 0.0: 21})

In [42]:
pred_failed = [a for a in pred if a < 2]
acc = (len(pred_failed) / len(pred)) * 100
print('Acc {:.2f}%'.format(round(acc,2)))

Acc 54.29%


In [43]:
pred_zero = [a for a in pred if a == 0]
acc = (len(pred_zero) / len(pred)) * 100
print('Acc {:.2f}%'.format(round(acc,2)))

Acc 6.00%


In [44]:
len(y_off)

2150

In [46]:
print(y_off)

[2. 3. 4. ... 0. 0. 0.]
