In [1]:
import xgboost
import joblib
import numpy as np
from sklearn.metrics import accuracy_score
from quadratic_weighted_kappa import quadratic_weighted_kappa
import pandas as pd
from sklearn.model_selection import train_test_split

In [3]:
#x = joblib.load('essay_ease10_sbert768_simbow_langerr_780')
x = joblib.load('essay_ease10_sbert768_simbow_langerr_780_normalized_asap5')
x_par = joblib.load('essay_ease10_sbert768_simbow_langerr_780_paraphrase_normalized_asap5')
y = joblib.load("score_asap5")

In [4]:
x.shape

(1805, 780)

In [5]:
def get_feature_names_extended():
    ease_feats = ['Answer Length', 'Word Counts', 'Average Word Length', 'Good n-gram', 'Prompt Overlap',
              'Prompt Overlap (synonyms)', 'Punctuation Counts', 'Spelling Error', 'Unique Words', 'Prompt Similarity SBert']

    sbert_feats = []
    sbert_dim = 768

    for i in range(0, sbert_dim):
    	fname = "sbert_" + str(i) 
    	sbert_feats.append(fname)
    
    prompt_similarity_bow = ["Prompt Similarity BOW"]
    lang_error = ["Language Error"]
    
    feature_names = ease_feats + prompt_similarity_bow + lang_error + sbert_feats 

    print("len feature names: ", len(feature_names))
    
    return feature_names

feature_names = get_feature_names_extended()[:12]

len feature names:  780


In [6]:
feature_names = get_feature_names_extended()

len feature names:  780


### create model

In [15]:
indices = np.arange(1800)
X_train, X_test, Y_train, Y_test, idx1, idx2 = train_test_split(x, y, indices, test_size=0.2, random_state=42)
X_train.shape

(1440, 780)

In [16]:
d_train = xgboost.DMatrix(X_train, label=Y_train, feature_names=feature_names)
d_test = xgboost.DMatrix(X_test, label=Y_test, feature_names=feature_names)

In [23]:
model = xgboost.train({"learning_rate": 0.05, "max_depth":3}, d_train, 200, evals = [(d_test, "test")], early_stopping_rounds=20)

[0]	test-rmse:2.30035
Will train until test-rmse hasn't improved in 20 rounds.
[1]	test-rmse:2.19388
[2]	test-rmse:2.09097
[3]	test-rmse:1.99648
[4]	test-rmse:1.90653
[5]	test-rmse:1.82066
[6]	test-rmse:1.73829
[7]	test-rmse:1.66124
[8]	test-rmse:1.58844
[9]	test-rmse:1.51933
[10]	test-rmse:1.45390
[11]	test-rmse:1.39255
[12]	test-rmse:1.33493
[13]	test-rmse:1.27968
[14]	test-rmse:1.22821
[15]	test-rmse:1.17958
[16]	test-rmse:1.13228
[17]	test-rmse:1.08909
[18]	test-rmse:1.04884
[19]	test-rmse:1.01011
[20]	test-rmse:0.97415
[21]	test-rmse:0.94110
[22]	test-rmse:0.91000
[23]	test-rmse:0.88061
[24]	test-rmse:0.85217
[25]	test-rmse:0.82629
[26]	test-rmse:0.80264
[27]	test-rmse:0.77939
[28]	test-rmse:0.75911
[29]	test-rmse:0.73957
[30]	test-rmse:0.72091
[31]	test-rmse:0.70416
[32]	test-rmse:0.68817
[33]	test-rmse:0.67364
[34]	test-rmse:0.65978
[35]	test-rmse:0.64668
[36]	test-rmse:0.63553
[37]	test-rmse:0.62461
[38]	test-rmse:0.61422
[39]	test-rmse:0.60461
[40]	test-rmse:0.59608
[41]	test-

In [24]:
yxgb_pred = model.predict(d_test)
yxgb_pred = np.round(yxgb_pred)
print("accuracy: ", accuracy_score(yxgb_pred, Y_test))
print("qwk: ", quadratic_weighted_kappa(yxgb_pred, Y_test))

accuracy:  0.7138888888888889
qwk:  0.8244318181818182


In [25]:
joblib.dump(model, "model_asap6_extended_780")

['model_asap6_extended_780']

### create model 5-fold

In [7]:
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)
#kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
#kf.get_n_splits(x, y)
print(kf)

KFold(n_splits=5, random_state=42, shuffle=True)


In [10]:
model2 = xgboost.XGBRegressor(objective ='reg:squarederror',
                colsample_bytree=0.4,
                 gamma=0,                 
                 learning_rate=0.07,
                 max_depth=3,
                 min_child_weight=1.5,
                 n_estimators=1000,                                                                    
                 reg_alpha=0.75,
                 reg_lambda=0.45,
                 subsample=0.6,
                 seed=42) 

### training using original essays --> predict paraphrased essays

In [13]:
qwk_scores = []
qwk_scores_par = []
qwk_between_predictions = []

test_indices = []
pred_labels = []
pred_labels_par = []

for train_index, test_index in kf.split(x, y):
    
    X_train, X_test, Y_train, Y_test = x[train_index], x[test_index], y[train_index], y[test_index]
         
    model2.fit(X_train, Y_train)
    
    predict = model2.predict(X_test)
    predict = np.round(predict)
    
    pred_labels.extend(predict)
    test_indices.extend(test_index)
    
    result = quadratic_weighted_kappa(Y_test, predict)
    print("Qwk : ", result)
    qwk_scores.append(result)
    
    
    # PREDICT FOR PARAPHRASED ESSAY
    x_par_test = x_par[test_index]
    predict_par = model2.predict(x_par_test)
    predict_par = np.round(predict_par)
    
    pred_labels_par.extend(predict_par)
    
    result_par = quadratic_weighted_kappa(Y_test, predict_par)
    print("Qwk par : ", result_par)
    qwk_scores_par.append(result_par)
    
    result_between_preds = quadratic_weighted_kappa(predict, predict_par)
    qwk_between_predictions.append(result_between_preds)
    print("qwk between prediction: ", result_between_preds)
    
print("\nMean QWK : ", np.mean(qwk_scores))
print("Mean QWK paraphrase : ", np.mean(qwk_scores_par))
print("Mean QWK between predictions : ", np.mean(qwk_between_predictions))

Qwk :  0.7838152757725856
Qwk par :  0.7212269104837216
qwk between prediction:  0.8342910235210185
Qwk :  0.7969935540186665
Qwk par :  0.753657866517782
qwk between prediction:  0.8690400321590698
Qwk :  0.8273125645997591
Qwk par :  0.775579104420513
qwk between prediction:  0.8619205005737376
Qwk :  0.7991864015252848
Qwk par :  0.7816752343513759
qwk between prediction:  0.8844128527587719
Qwk :  0.8252928789114105
Qwk par :  0.7981323044231952
qwk between prediction:  0.8727685070258508

Mean QWK :  0.8065201349655412
Mean QWK paraphrase :  0.7660542840393175
Mean QWK between predictions :  0.8644865832076898


### Training using paraphrased essays --> predict original essays

In [12]:
qwk_scores = []
qwk_scores_ori = []
qwk_between_predictions = []

test_indices = []
pred_labels = []
pred_labels_ori = []

print("Training using paraphrase")
print("=========================")

for train_index, test_index in kf.split(x, y):
    
    X_train, X_test, Y_train, Y_test = x_par[train_index], x_par[test_index], y[train_index], y[test_index]
         
    model2.fit(X_train, Y_train)
    
    predict = model2.predict(X_test)
    predict = np.round(predict)
    
    pred_labels.extend(predict)
    test_indices.extend(test_index)
    
    result = quadratic_weighted_kappa(Y_test, predict)
    print("Qwk : ", result)
    qwk_scores.append(result)
    
    
    # PREDICT FOR ORIGINAL ESSAY
    x_ori_test = x[test_index]
    predict_ori = model2.predict(x_ori_test)
    predict_ori = np.round(predict_ori)
    
    pred_labels_ori.extend(predict_ori)
    
    result_ori = quadratic_weighted_kappa(Y_test, predict_ori)
    print("Qwk ori : ", result_ori)
    qwk_scores_ori.append(result_ori)
    
    result_between_preds = quadratic_weighted_kappa(predict, predict_ori)
    qwk_between_predictions.append(result_between_preds)
    print("qwk between prediction: ", result_between_preds)
        

print("\nMean QWK Paraphrase: ", np.mean(qwk_scores))
print("Mean QWK ori : ", np.mean(qwk_scores_ori))
print("Mean QWK between predictions : ", np.mean(qwk_between_predictions))

Training using paraphrase
Qwk :  0.7520328221883934
Qwk ori :  0.765958252293947
qwk between prediction:  0.8488709327603512
Qwk :  0.7809593470256253
Qwk ori :  0.7702788695489425
qwk between prediction:  0.8516623694465391
Qwk :  0.7940420078819637
Qwk ori :  0.7596269433756531
qwk between prediction:  0.8848057075635696
Qwk :  0.782041235789068
Qwk ori :  0.780809877843213
qwk between prediction:  0.8810685164679289
Qwk :  0.8274588525644369
Qwk ori :  0.7964741151729384
qwk between prediction:  0.857065600525345

Mean QWK Paraphrase:  0.7873068530898975
Mean QWK ori :  0.7746296116469388
Mean QWK between predictions :  0.8646946253527468


In [14]:
print(len(pred_labels))
print(len(test_indices))

1805
1805


In [15]:
pred_labels_int = list(map(int, pred_labels))
print(pred_labels)
print(pred_labels_int)

[3.0, 3.0, 4.0, 2.0, 1.0, 2.0, 3.0, 3.0, 2.0, 2.0, 3.0, 3.0, 2.0, 3.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 1.0, 3.0, 2.0, 3.0, 4.0, 2.0, 3.0, 2.0, 3.0, 2.0, 2.0, 1.0, 3.0, 3.0, 2.0, 4.0, 3.0, 3.0, 1.0, 2.0, 3.0, 2.0, 3.0, 2.0, 4.0, 4.0, 3.0, 1.0, 1.0, 4.0, 2.0, 2.0, 2.0, 3.0, 4.0, 2.0, 3.0, 2.0, 1.0, 1.0, 3.0, 3.0, 2.0, 3.0, 1.0, 1.0, 2.0, 2.0, 3.0, 2.0, 1.0, 2.0, 1.0, 2.0, 3.0, 2.0, 2.0, 3.0, 2.0, 3.0, 2.0, 4.0, 3.0, 4.0, 3.0, 3.0, 1.0, 3.0, 3.0, 4.0, 2.0, 2.0, 2.0, 2.0, 3.0, 2.0, 3.0, 1.0, 3.0, 2.0, 4.0, 3.0, 3.0, 4.0, 3.0, 2.0, 1.0, 3.0, 1.0, 3.0, 3.0, 2.0, 1.0, 4.0, 3.0, 3.0, 1.0, 2.0, 4.0, 2.0, 4.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 3.0, 1.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 1.0, 4.0, 3.0, 2.0, 1.0, 3.0, 2.0, 4.0, 2.0, 3.0, 2.0, 3.0, 1.0, 3.0, 3.0, 1.0, 4.0, 3.0, 3.0, 3.0, 1.0, 3.0, 4.0, 2.0, 2.0, 3.0, 3.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0, 3.0, 2.0, 2.0, 2.0, 3.0, 2.0, 4.0, 4.0, 4.0, 2.0, 2.0, 2.0, 3.0, 3.0, 2.0, 1.0, 3.0, 3.0, 4.0, 2.0, 2.0, 2.0, 3.0, 4.0, 2.0, 3.0, 2.0, 4.0,

In [16]:
pred_labels_par_int = list(map(int, pred_labels_par))
print(pred_labels_par)
print(pred_labels_par_int)

[3.0, 3.0, 4.0, 2.0, 1.0, 2.0, 3.0, 3.0, 2.0, 2.0, 3.0, 2.0, 3.0, 3.0, 1.0, 2.0, 2.0, 3.0, 3.0, 3.0, 2.0, 3.0, 2.0, 2.0, 3.0, 1.0, 3.0, 2.0, 3.0, 2.0, 2.0, 1.0, 3.0, 3.0, 2.0, 3.0, 3.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 2.0, 4.0, 3.0, 2.0, 2.0, 2.0, 3.0, 2.0, 2.0, 3.0, 3.0, 3.0, 1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 3.0, 1.0, 2.0, 2.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0, 3.0, 1.0, 3.0, 2.0, 3.0, 3.0, 4.0, 3.0, 2.0, 1.0, 3.0, 2.0, 3.0, 2.0, 2.0, 2.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 2.0, 0.0, 3.0, 1.0, 3.0, 3.0, 2.0, 1.0, 3.0, 3.0, 3.0, 1.0, 2.0, 4.0, 2.0, 3.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 1.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 2.0, 3.0, 3.0, 2.0, 1.0, 3.0, 2.0, 4.0, 2.0, 2.0, 2.0, 3.0, 1.0, 3.0, 3.0, 1.0, 4.0, 2.0, 3.0, 3.0, 2.0, 2.0, 4.0, 2.0, 2.0, 3.0, 3.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0, 3.0, 2.0, 1.0, 2.0, 3.0, 2.0, 4.0, 3.0, 4.0, 2.0, 2.0, 2.0, 2.0, 3.0, 2.0, 1.0, 4.0, 3.0, 3.0, 2.0, 2.0, 2.0, 3.0, 3.0, 2.0, 3.0, 2.0, 4.0,

In [17]:
new_score = np.zeros(1805)
new_score_par = np.zeros(1805)
new_score

array([0., 0., 0., ..., 0., 0., 0.])

In [18]:
new_score[test_indices] = pred_labels_int
new_score_par[test_indices] = pred_labels_par_int

In [19]:
new_score

array([3., 3., 3., ..., 4., 3., 2.])

In [20]:
new_score_par

array([3., 3., 2., ..., 3., 2., 2.])

In [21]:
new_score[60]

1.0

In [22]:
joblib.dump(new_score, 'model_score_normalized')

['model_score_normalized']

In [23]:
joblib.dump(new_score, 'model_score_paraphrase_normalized')

['model_score_paraphrase_normalized']

## Model performance to predict paraphrased essays

In [24]:
# Accuracy
accuracy_score(new_score, new_score_par)

0.7955678670360111

In [25]:
# QWK
quadratic_weighted_kappa(new_score, new_score_par)

0.8657992835455449