In [1]:
import pandas as pd

# for graph plotting
import matplotlib.pyplot as plt
import seaborn as sns

# for adaboost
from sklearn.ensemble import AdaBoostClassifier

# for xgboost
from xgboost import XGBClassifier

# for evaluation metrics
%run -i helper_functions.py

# for multi-label classification
from skmultilearn.problem_transform import BinaryRelevance
from skmultilearn.problem_transform import ClassifierChain
from skmultilearn.problem_transform import LabelPowerset

# for hyperparameters tuning
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV 

# for model export
import joblib

import warnings
warnings.filterwarnings("ignore")

## Read Data

In [2]:
selected_train = pd.read_csv('Data\selected_train.csv')

In [3]:
print(selected_train.shape)
selected_train.head()

(159571, 48)


Unnamed: 0,toxic,severe_toxic,obscene,threat,insult,identity_hate,none,min_length_scaled,num_words_vs_length,exclamation_marks_vs_length,...,105,114,132,135,139,143,156,157,170,198
0,0,0,0,0,0,0,1,0.009393,0.181132,0.0,...,-0.09132,-0.017755,0.003997,-0.199211,-0.109984,0.089084,0.012651,-0.019501,-0.091946,-0.113697
1,0,0,0,0,0,0,1,0.000723,0.160714,0.008929,...,-0.118445,0.039541,0.017522,0.001397,9.4e-05,0.021135,0.039308,-0.067295,-0.079366,-0.044959
2,0,0,0,0,0,0,1,0.007225,0.188841,0.0,...,-0.131555,-0.060714,0.026459,-0.029582,-0.146134,0.109907,0.036539,-0.053628,0.02753,-0.21123
3,0,0,0,0,0,0,1,0.007948,0.175719,0.0,...,-0.145382,-0.004285,-0.000706,-0.134984,-0.214832,0.17756,0.007675,-0.087473,0.106848,-0.015987
4,0,0,0,0,0,0,1,0.003613,0.208955,0.0,...,0.007073,-0.204627,0.162032,0.013798,-0.221076,0.163578,-0.06831,-0.031184,-0.01728,0.038566


In [16]:
selected_test = pd.read_csv('Data\selected_test.csv')

In [17]:
selected_test.head()

Unnamed: 0,toxic,severe_toxic,obscene,threat,insult,identity_hate,min_length_scaled,num_words_vs_length,exclamation_marks_vs_length,num_unique_words_scaled,...,114,132,135,139,143,156,157,170,198,none
0,0,0,0,0,0,0,0.00289,0.166667,0.0,0.032911,...,0.040738,-0.010083,-0.158238,-0.204194,0.116565,0.098296,-0.20616,0.046779,-0.036748,1
1,0,0,0,0,0,0,0.005058,0.1875,0.0,0.01519,...,-0.013467,0.00355,-0.117174,-0.075475,0.043945,-0.148617,-0.35074,0.113815,-0.070907,1
2,0,0,0,0,0,0,0.000723,0.149554,0.002232,0.118987,...,-0.12508,0.045087,-0.119656,-0.274707,0.102546,0.100782,-0.107205,-0.05078,-0.098343,1
3,0,0,0,0,0,0,0.007225,0.169661,0.0,0.070886,...,-0.023304,-0.005002,-0.281848,-0.157011,0.184323,0.067625,-0.066242,0.023737,-0.111269,1
4,0,0,0,0,0,0,0.003613,0.167665,0.0,0.060759,...,0.055657,0.024936,-0.181419,-0.165707,0.194511,0.143557,-0.059017,0.136676,-0.00453,1


In [None]:
features = selected_train.columns[7:]

In [31]:
labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

## Adaboost

For Adaboost, it deals with class imbalance by constructing successive training sets based on incorrectly classified examples. So it seems not to be necessary to introduce a kind of class_weight parameter.

### Baseline Adaboost using BinaryRelevance

In [86]:
classifier_ab = BinaryRelevance(AdaBoostClassifier(random_state=0))
# train
classifier_ab.fit(selected_train[features], selected_train[labels])
# predict
predictions_ab = classifier_ab.predict(selected_train[features])

In [194]:
# predict probability
predictions_proba_ab = classifier_ab.predict_proba(selected_train[features])

In [184]:
# model evaluation
get_evaluation_score(selected_train[labels], predictions_ab, predictions_proba_ab)

Accuracy score:  0.9125404992135162
Precision score:  0.7770655169870151
Recall score:  0.6020286056185538
F1 score:  0.677373812686023
Confusion matrix for label toxic:
[[142505   1772]
 [  5597   9697]]
Confusion matrix for label severe_toxic:
[[157317    659]
 [   992    603]]
Confusion matrix for label obscene:
[[149889   1233]
 [  2689   5760]]
Confusion matrix for label threat:
[[158920    173]
 [   369    109]]
Confusion matrix for label insult:
[[150138   1556]
 [  3143   4734]]
Confusion matrix for label identity_hate:
[[157765    401]
 [  1178    227]]
Logarithmic Loss:  0.3880227138605389
ROC AUC score:  0.9677564790633872


In [195]:
# evaluation on test data
predictions_ab_test = classifier_ab.predict(selected_test[features])
predictions_proba_ab_test = classifier_ab.predict_proba(selected_test[features])
get_evaluation_score(selected_test[labels], predictions_ab_test, predictions_proba_ab_test)

Accuracy score:  0.8834130482353308
Precision score:  0.5983698968268324
Recall score:  0.6262243068009381
F1 score:  0.6075640597512085
Confusion matrix for label toxic:
[[55070  2818]
 [ 1740  4350]]
Confusion matrix for label severe_toxic:
[[63216   395]
 [  199   168]]
Confusion matrix for label obscene:
[[58814  1473]
 [ 1260  2431]]
Confusion matrix for label threat:
[[63683    84]
 [  142    69]]
Confusion matrix for label insult:
[[59461  1090]
 [ 1518  1909]]
Confusion matrix for label identity_hate:
[[63051   215]
 [  560   152]]
Logarithmic Loss:  0.3994778359106472
ROC AUC score:  0.953405666218833


#### Export Model

In [88]:
classifier_ab

In [93]:
joblib.dump(classifier_ab, 'ada_br.sav')

['ada_br.sav']

### Baseline Adaboost using ClassifierChain

In [98]:
classifier_chain_ab = ClassifierChain(AdaBoostClassifier(random_state=0))
# train
classifier_chain_ab.fit(selected_train[features], selected_train[labels])
# predict
predictions_chain_ab = classifier_chain_ab.predict(selected_train[features])

In [198]:
# predict probability
predictions_proba_chain_ab = classifier_chain_ab.predict_proba(selected_train[features])

In [199]:
# model evaluation
get_evaluation_score(selected_train[labels], predictions_chain_ab, predictions_proba_chain_ab)

Accuracy score:  0.9135745216862713
Precision score:  0.7460722264367039
Recall score:  0.631431990426805
F1 score:  0.6779039904300239
Confusion matrix for label toxic:
[[142505   1772]
 [  5597   9697]]
Confusion matrix for label severe_toxic:
[[157653    323]
 [  1181    414]]
Confusion matrix for label obscene:
[[149151   1971]
 [  2112   6337]]
Confusion matrix for label threat:
[[158962    131]
 [   391     87]]
Confusion matrix for label insult:
[[149050   2644]
 [  2452   5425]]
Confusion matrix for label identity_hate:
[[157704    462]
 [  1203    202]]
Logarithmic Loss:  0.38481131371737654
ROC AUC score:  0.965623896018629


In [200]:
# evaluation on test data
predictions_chain_ab_test = classifier_chain_ab.predict(selected_test[features])
predictions_proba_chain_ab_test = classifier_chain_ab.predict_proba(selected_test[features])
get_evaluation_score(selected_test[labels], predictions_chain_ab_test, predictions_proba_chain_ab_test)

Accuracy score:  0.8864453405858264
Precision score:  0.5464551013009245
Recall score:  0.6650572492757622
F1 score:  0.5970707628466516
Confusion matrix for label toxic:
[[55070  2818]
 [ 1740  4350]]
Confusion matrix for label severe_toxic:
[[63392   219]
 [  241   126]]
Confusion matrix for label obscene:
[[58038  2249]
 [ 1012  2679]]
Confusion matrix for label threat:
[[63692    75]
 [  140    71]]
Confusion matrix for label insult:
[[58352  2199]
 [ 1153  2274]]
Confusion matrix for label identity_hate:
[[62989   277]
 [  570   142]]
Logarithmic Loss:  0.39687511432535383
ROC AUC score:  0.9507304676178021


#### Export Model

In [99]:
classifier_chain_ab

In [100]:
joblib.dump(classifier_chain_ab, 'ada_cc.sav')

['ada_cc.sav']

### Baseline Adaboost using LabelPowerset

Model with the worst performance.

In [102]:
classifier_powerset_ab = LabelPowerset(AdaBoostClassifier(random_state=0))
# train
classifier_powerset_ab.fit(selected_train[features], selected_train[labels])
# predict
predictions_powerset_ab = classifier_powerset_ab.predict(selected_train[features])

In [207]:
# predict probability
predictions_proba_powerset_ab = classifier_powerset_ab.predict_proba(selected_train[features])

In [208]:
# model evaluation
get_evaluation_score(selected_train[labels], predictions_powerset_ab, predictions_proba_powerset_ab)

Accuracy score:  0.9075019897099097
Precision score:  0.6413750895352156
Recall score:  0.5611145934241267
F1 score:  0.5842075791281165
Confusion matrix for label toxic:
[[142556   1721]
 [  7340   7954]]
Confusion matrix for label severe_toxic:
[[157971      5]
 [  1595      0]]
Confusion matrix for label obscene:
[[147780   3342]
 [  2121   6328]]
Confusion matrix for label threat:
[[159088      5]
 [   478      0]]
Confusion matrix for label insult:
[[147436   4258]
 [  2465   5412]]
Confusion matrix for label identity_hate:
[[158161      5]
 [  1405      0]]
Logarithmic Loss:  0.35122484907961943
ROC AUC score:  0.7772460721531839


In [209]:
# evaluation on test data
predictions_powerset_ab_test = classifier_powerset_ab.predict(selected_test[features])
predictions_proba_powerset_ab_test = classifier_powerset_ab.predict_proba(selected_test[features])
get_evaluation_score(selected_test[labels], predictions_powerset_ab_test, predictions_proba_powerset_ab_test)

Accuracy score:  0.8843821313576542
Precision score:  0.4468084789092727
Recall score:  0.6292592081666437
F1 score:  0.5135609130324246
Confusion matrix for label toxic:
[[55198  2690]
 [ 2260  3830]]
Confusion matrix for label severe_toxic:
[[63601    10]
 [  367     0]]
Confusion matrix for label obscene:
[[56590  3697]
 [  878  2813]]
Confusion matrix for label threat:
[[63757    10]
 [  211     0]]
Confusion matrix for label insult:
[[56521  4030]
 [  947  2480]]
Confusion matrix for label identity_hate:
[[63256    10]
 [  712     0]]
Logarithmic Loss:  0.3607347370852993
ROC AUC score:  0.7795176830340286


#### Export Model

In [103]:
classifier_powerset_ab

In [104]:
joblib.dump(classifier_powerset_ab, 'ada_ps.sav')

['ada_ps.sav']

### Hyperparameters Tuning

- base_estimator: the type of base learners to use, stick with the default DecisionTreeClassifier(max_depth=1)
- n_estimators [default=50]: the number of base estimators to use
- learning_rate [default=1.0]: the contribution of each base estimator, there is a trade-off between the learning_rate and n_estimators parameters
- algorithm: the boosting algorithm to use, stick with the default SAMME.R, as it typically converges faster than SAMME, achieving a lower test error with fewer boosting iterations

#### n_estimators & learning_rate

In [12]:
param_test = {
 'classifier__n_estimators': [50, 100, 500],
 'classifier__learning_rate': np.arange(0.6, 1.5, 0.2)
}

ab_tuned = BinaryRelevance(AdaBoostClassifier(random_state=0))
gsearch_ab = GridSearchCV(estimator=ab_tuned, param_grid=param_test, scoring='f1_weighted', cv=5, verbose=3)
gsearch_ab.fit(selected_train[features], selected_train[labels])

Fitting 5 folds for each of 15 candidates, totalling 75 fits
[CV 1/5] END classifier__learning_rate=0.6, classifier__n_estimators=50;, score=0.663 total time= 5.8min
[CV 2/5] END classifier__learning_rate=0.6, classifier__n_estimators=50;, score=0.665 total time= 5.6min
[CV 3/5] END classifier__learning_rate=0.6, classifier__n_estimators=50;, score=0.671 total time= 5.8min
[CV 4/5] END classifier__learning_rate=0.6, classifier__n_estimators=50;, score=0.671 total time= 5.7min
[CV 5/5] END classifier__learning_rate=0.6, classifier__n_estimators=50;, score=0.657 total time= 5.6min
[CV 1/5] END classifier__learning_rate=0.6, classifier__n_estimators=100;, score=0.674 total time=11.3min
[CV 2/5] END classifier__learning_rate=0.6, classifier__n_estimators=100;, score=0.675 total time=11.3min
[CV 3/5] END classifier__learning_rate=0.6, classifier__n_estimators=100;, score=0.679 total time=11.2min
[CV 4/5] END classifier__learning_rate=0.6, classifier__n_estimators=100;, score=0.679 total tim

[CV 4/5] END classifier__learning_rate=1.4000000000000004, classifier__n_estimators=500;, score=0.679 total time=62.1min
[CV 5/5] END classifier__learning_rate=1.4000000000000004, classifier__n_estimators=500;, score=0.669 total time=83.0min


In [13]:
gsearch_ab.best_params_, gsearch_ab.best_score_

({'classifier__learning_rate': 0.8, 'classifier__n_estimators': 500},
 0.6823464428272762)

In [14]:
pd.DataFrame({'mean': gsearch_ab.cv_results_['mean_test_score'], 'std': gsearch_ab.cv_results_['std_test_score']})

Unnamed: 0,mean,std
0,0.665522,0.00512
1,0.67488,0.004248
2,0.681556,0.003535
3,0.668795,0.004512
4,0.67718,0.004762
5,0.682346,0.004356
6,0.669831,0.003561
7,0.676308,0.003902
8,0.680495,0.002908
9,0.671802,0.004781


### Tuned Adaboost using BinaryRelevance

In [19]:
classifier_ab_tuned = BinaryRelevance(AdaBoostClassifier(random_state=0, learning_rate=0.8, n_estimators=500))
# train
classifier_ab_tuned.fit(selected_train[features], selected_train[labels])
# predict
predictions_ab_tuned = classifier_ab_tuned.predict(selected_train[features])

In [20]:
# predict probability
predictions_proba_ab_tuned = classifier_ab_tuned.predict_proba(selected_train[features])

In [21]:
# model evaluation
get_evaluation_score(selected_train[labels], predictions_ab_tuned, predictions_proba_ab_tuned)

Accuracy score:  0.9164259169899293
Precision score:  0.7918405542208801
Recall score:  0.6246224856117157
F1 score:  0.6965734077694495
Confusion matrix for label toxic:
[[142540   1737]
 [  5190  10104]]
Confusion matrix for label severe_toxic:
[[157498    478]
 [   979    616]]
Confusion matrix for label obscene:
[[149832   1290]
 [  2531   5918]]
Confusion matrix for label threat:
[[158998     95]
 [   351    127]]
Confusion matrix for label insult:
[[150140   1554]
 [  3011   4866]]
Confusion matrix for label identity_hate:
[[157839    327]
 [  1113    292]]
Logarithmic Loss:  0.39293675460241834
ROC AUC score:  0.9757786945238773


In [22]:
# evaluation on test data
predictions_ab_tuned_test = classifier_ab_tuned.predict(selected_test[features])
predictions_proba_ab_tuned_test = classifier_ab_tuned.predict_proba(selected_test[features])
get_evaluation_score(selected_test[labels], predictions_ab_tuned_test, predictions_proba_ab_tuned_test)

Accuracy score:  0.8831785926412205
Precision score:  0.5963603823674458
Recall score:  0.6378810870464892
F1 score:  0.6120293693033916
Confusion matrix for label toxic:
[[54943  2945]
 [ 1628  4462]]
Confusion matrix for label severe_toxic:
[[63284   327]
 [  222   145]]
Confusion matrix for label obscene:
[[58768  1519]
 [ 1232  2459]]
Confusion matrix for label threat:
[[63681    86]
 [  149    62]]
Confusion matrix for label insult:
[[59428  1123]
 [ 1485  1942]]
Confusion matrix for label identity_hate:
[[63052   214]
 [  534   178]]
Logarithmic Loss:  0.4046655868859925
ROC AUC score:  0.9518584469796296


The tuned model shows a small performance improvement, but considering its long training time, we decided to stick with the baseline model.

## XGboost

In [4]:
features = selected_train.columns[7:]
labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

scale_pos_weight works in a similar manner as class_weight, to use this parameter, we treat this multi-label classification problem as multiple binary classfication problems.

### OneVsRest XGboost with scale_pos_weight

In [30]:
for label in labels:
    print(f'fitting for label {label}')
    weight = len(selected_train[selected_train[label]==0]) / len(selected_train[selected_train[label]==1])
    xgb = XGBClassifier(random_state=0, scale_pos_weight=weight)
    xgb.fit(selected_train[features], selected_train[label])
    xgb_predictions = xgb.predict(selected_train[features])
    xgb_predictions_proba = xgb.predict_proba(selected_train[features])
    print('Model Performance on training set:')
    get_evaluation_score_single_class(selected_train[label], xgb_predictions, xgb_predictions_proba[:, 1])
    
    xgb_predictions_test = xgb.predict(selected_test[features])
    xgb_predictions_proba_test = xgb.predict_proba(selected_test[features])
    print('Model Performance on test set:')
    get_evaluation_score_single_class(selected_test[label], xgb_predictions_test, xgb_predictions_proba_test[:, 1])

fitting for label toxic
Model Performance on training set:
Accuracy score:  0.9516077482750626
Precision score:  0.673558265334189
Recall score:  0.9607035438734144
F1 score:  0.7919047105745392
Confusion matrix:
[[137156   7121]
 [   601  14693]]
Logarithmic Loss:  0.1336701716089512
ROC AUC score:  0.9917456562103583
Model Performance on test set:
Accuracy score:  0.8784269592672481
Precision score:  0.4313374552554507
Recall score:  0.870607553366174
F1 score:  0.5768686758785768
Confusion matrix:
[[50898  6990]
 [  788  5302]]
Logarithmic Loss:  0.3300764179026478
ROC AUC score:  0.9471287712822496
fitting for label severe_toxic
Model Performance on training set:
Accuracy score:  0.9945729487187521
Precision score:  0.6481105241771637
Recall score:  1.0
F1 score:  0.7864891518737672
Confusion matrix:
[[157110    866]
 [     0   1595]]
Logarithmic Loss:  0.013561043013184353
ROC AUC score:  0.9999787892863532
Model Performance on test set:
Accuracy score:  0.9844634092969459
Precisi

Recalls are generally higher than precisions, but the overall performance does not stand out.

In [35]:
for label in labels:
    print(f'fitting for label {label}')
    weight = np.sqrt(len(selected_train[selected_train[label]==0]) / len(selected_train[selected_train[label]==1]))
    xgb = XGBClassifier(random_state=0, scale_pos_weight=weight)
    xgb.fit(selected_train[features], selected_train[label])
    xgb_predictions = xgb.predict(selected_train[features])
    xgb_predictions_proba = xgb.predict_proba(selected_train[features])
    print('Model Performance on training set:')
    get_evaluation_score_single_class(selected_train[label], xgb_predictions, xgb_predictions_proba[:, 1])
    
    xgb_predictions_test = xgb.predict(selected_test[features])
    xgb_predictions_proba_test = xgb.predict_proba(selected_test[features])
    print('Model Performance on test set:')
    get_evaluation_score_single_class(selected_test[label], xgb_predictions_test, xgb_predictions_proba_test[:, 1])

fitting for label toxic
Model Performance on training set:
Accuracy score:  0.9719121895582531
Precision score:  0.8296341463414634
Recall score:  0.8896299202301556
F1 score:  0.85858522117751
Confusion matrix:
[[141483   2794]
 [  1688  13606]]
Logarithmic Loss:  0.08499322363867028
ROC AUC score:  0.9911264009906027
Model Performance on test set:
Accuracy score:  0.9086092094157366
Precision score:  0.5124628166991486
Recall score:  0.8203612479474548
F1 score:  0.63084790706484
Confusion matrix:
[[53135  4753]
 [ 1094  4996]]
Logarithmic Loss:  0.23979647137692156
ROC AUC score:  0.948740474499878
fitting for label severe_toxic
Model Performance on training set:
Accuracy score:  0.9987779734412895
Precision score:  0.8910614525139665
Recall score:  1.0
F1 score:  0.9423929098966027
Confusion matrix:
[[157781    195]
 [     0   1595]]
Logarithmic Loss:  0.006376671501702909
ROC AUC score:  0.999995864615283
Model Performance on test set:
Accuracy score:  0.9890587389415112
Precision

By setting the scale_pos_weight as $\sqrt{\frac{\text{number of negative class}}{\text{number of positive class}}}$, an improvement on ROC AUC score can be achieved.

In [36]:
for label in labels:
    print(np.sqrt(len(selected_train[selected_train[label]==0]) / len(selected_train[selected_train[label]==1])))

3.071411519116151
9.952111037693614
4.229228899698583
18.243643932423545
4.388375471505747
10.610073773561947


### Baseline XGboost using BinaryRelevance

In [106]:
classifier = BinaryRelevance(XGBClassifier(random_state=0))
# train
classifier.fit(selected_train[features], selected_train[labels])
# predict
predictions = classifier.predict(selected_train[features])

In [151]:
# predict probability
predictions_proba = classifier.predict_proba(selected_train[features])

In [176]:
# model evaluation
get_evaluation_score(selected_train[labels], predictions, predictions_proba)

Accuracy score:  0.9615782316335675
Precision score:  0.9465859395512345
Recall score:  0.831870761866773
F1 score:  0.8850346534024727
Confusion matrix for label toxic:
[[143520    757]
 [  3228  12066]]
Confusion matrix for label severe_toxic:
[[157976      0]
 [    54   1541]]
Confusion matrix for label obscene:
[[150778    344]
 [  1035   7414]]
Confusion matrix for label threat:
[[159093      0]
 [     1    477]]
Confusion matrix for label insult:
[[151161    533]
 [  1366   6511]]
Confusion matrix for label identity_hate:
[[158165      1]
 [   217   1188]]
Logarithmic Loss:  0.22531139021776553
ROC AUC score:  0.9936558406726059


In [189]:
# evaluation on test data
predictions_test = classifier.predict(selected_test[features])
predictions_proba_test = classifier.predict_proba(selected_test[features])
get_evaluation_score(selected_test[labels], predictions_test, predictions_proba_test)

Accuracy score:  0.8815217731095064
Precision score:  0.6005631210353756
Recall score:  0.6479514415781487
F1 score:  0.6141097887551091
Confusion matrix for label toxic:
[[54820  3068]
 [ 1529  4561]]
Confusion matrix for label severe_toxic:
[[63375   236]
 [  243   124]]
Confusion matrix for label obscene:
[[58701  1586]
 [ 1127  2564]]
Confusion matrix for label threat:
[[63732    35]
 [  176    35]]
Confusion matrix for label insult:
[[59397  1154]
 [ 1445  1982]]
Confusion matrix for label identity_hate:
[[63159   107]
 [  584   128]]
Logarithmic Loss:  0.30727356342304224
ROC AUC score:  0.9570723263839552


#### Export Model

In [107]:
classifier

In [108]:
joblib.dump(classifier, 'xgb_br.sav')

['xgb_br.sav']

### Baseline XGboost using ClassifierChain

In [110]:
classifier_chain = ClassifierChain(XGBClassifier(random_state=0))
# train
classifier_chain.fit(selected_train[features], selected_train[labels])
# predict
predictions_chain = classifier_chain.predict(selected_train[features])

In [178]:
# predict probability
predictions_proba_chain = classifier_chain.predict_proba(selected_train[features])

In [179]:
# model evaluation
get_evaluation_score(selected_train[labels], predictions_chain, predictions_proba_chain)

Accuracy score:  0.9647178998690238
Precision score:  0.9406440480754606
Recall score:  0.8365433927859137
F1 score:  0.8849359261159032
Confusion matrix for label toxic:
[[143520    757]
 [  3228  12066]]
Confusion matrix for label severe_toxic:
[[157971      5]
 [    86   1509]]
Confusion matrix for label obscene:
[[150666    456]
 [   951   7498]]
Confusion matrix for label threat:
[[159093      0]
 [    42    436]]
Confusion matrix for label insult:
[[151076    618]
 [  1245   6632]]
Confusion matrix for label identity_hate:
[[158149     17]
 [   185   1220]]
Logarithmic Loss:  0.2354450901087629
ROC AUC score:  0.9922865898808163


In [190]:
# evaluation on test data
predictions_chain_test = classifier_chain.predict(selected_test[features])
predictions_proba_chain_test = classifier_chain.predict_proba(selected_test[features])
get_evaluation_score(selected_test[labels], predictions_chain_test, predictions_proba_chain_test)

Accuracy score:  0.8849448247835193
Precision score:  0.5788293041985081
Recall score:  0.661884397847979
F1 score:  0.6097843305431346
Confusion matrix for label toxic:
[[54820  3068]
 [ 1529  4561]]
Confusion matrix for label severe_toxic:
[[63390   221]
 [  231   136]]
Confusion matrix for label obscene:
[[58416  1871]
 [ 1081  2610]]
Confusion matrix for label threat:
[[63727    40]
 [  169    42]]
Confusion matrix for label insult:
[[58962  1589]
 [ 1321  2106]]
Confusion matrix for label identity_hate:
[[63156   110]
 [  571   141]]
Logarithmic Loss:  0.321184540196078
ROC AUC score:  0.9550026143474462


#### Export Model

In [111]:
classifier_chain

In [112]:
joblib.dump(classifier_chain, 'xgb_cc.sav')

['xgb_cc.sav']

### Baseline XGboost using LabelPowerset

Model with the best performance.

In [114]:
classifier_powerset = LabelPowerset(XGBClassifier(random_state=0))
# train
classifier_powerset.fit(selected_train[features], selected_train[labels])
# predict
predictions_powerset = classifier_powerset.predict(selected_train[features])

In [204]:
# predict probability
predictions_proba_powerset = classifier_powerset.predict_proba(selected_train[features])

In [205]:
# model evaluation
get_evaluation_score(selected_train[labels], predictions_powerset, predictions_proba_powerset)

Accuracy score:  0.9740053017152239
Precision score:  0.9690263930592887
Recall score:  0.867200410279788
F1 score:  0.9136343692292415
Confusion matrix for label toxic:
[[143960    317]
 [  3219  12075]]
Confusion matrix for label severe_toxic:
[[157975      1]
 [    13   1582]]
Confusion matrix for label obscene:
[[150812    310]
 [   653   7796]]
Confusion matrix for label threat:
[[159092      1]
 [     0    478]]
Confusion matrix for label insult:
[[151346    348]
 [   771   7106]]
Confusion matrix for label identity_hate:
[[158163      3]
 [     5   1400]]
Logarithmic Loss:  0.2166358224623783
ROC AUC score:  0.9961318578428442


In [206]:
# evaluation on test data
predictions_powerset_test = classifier_powerset.predict(selected_test[features])
predictions_proba_powerset_test = classifier_powerset.predict_proba(selected_test[features])
get_evaluation_score(selected_test[labels], predictions_powerset_test, predictions_proba_powerset_test)

Accuracy score:  0.8912438650786207
Precision score:  0.6118931051023679
Recall score:  0.6142226514001932
F1 score:  0.6027923751631025
Confusion matrix for label toxic:
[[55392  2496]
 [ 1866  4224]]
Confusion matrix for label severe_toxic:
[[63420   191]
 [  259   108]]
Confusion matrix for label obscene:
[[58768  1519]
 [ 1203  2488]]
Confusion matrix for label threat:
[[63748    19]
 [  191    20]]
Confusion matrix for label insult:
[[59300  1251]
 [ 1455  1972]]
Confusion matrix for label identity_hate:
[[63199    67]
 [  619    93]]
Logarithmic Loss:  0.304448959899994
ROC AUC score:  0.957436038285783


#### Export Model

In [115]:
classifier_powerset

In [116]:
joblib.dump(classifier_powerset, 'xgb_ps.sav')

['xgb_ps.sav']

## Oversampled Data

In [164]:
oversampled_train = pd.read_csv('Data/train_oversampled.csv')

In [165]:
print(oversampled_train.shape)
oversampled_train.head()

(421989, 49)


Unnamed: 0,toxic,obscene,insult,none,min_length_scaled,num_words_vs_length,exclamation_marks_vs_length,num_unique_words_scaled,verbs_vs_length,num_uppercase_scaled,...,139,143,156,157,170,198,clean_text,threat,identity_hate,severe_toxic
0,0,0,0,1,0.009393,0.181132,0.0,0.106329,0.041509,0.003426,...,-0.109984,0.089084,0.012651,-0.019501,-0.091946,-0.113697,explanation why the edits made under my userna...,0,0,0
1,0,0,0,1,0.000723,0.160714,0.008929,0.043038,0.026786,0.001612,...,9.4e-05,0.021135,0.039308,-0.067295,-0.079366,-0.044959,d'aww! he matches this background colour i am ...,0,0,0
2,0,0,0,1,0.007225,0.188841,0.0,0.091139,0.038627,0.000806,...,-0.146134,0.109907,0.036539,-0.053628,0.02753,-0.21123,"hey man, i am really not trying to edit war. i...",0,0,0
3,0,0,0,1,0.007948,0.175719,0.0,0.141772,0.036741,0.002217,...,-0.214832,0.17756,0.007675,-0.087473,0.106848,-0.015987,""" more i cannot make any real suggestions on i...",0,0,0
4,0,0,0,1,0.003613,0.208955,0.0,0.027848,0.059701,0.000403,...,-0.221076,0.163578,-0.06831,-0.031184,-0.01728,0.038566,"you, sir, are my hero. any chance you remember...",0,0,0


In [4]:
features = selected_train.columns[7:]
labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

### Baseline XGboost using Oversampled Data

In [167]:
classifier_over = BinaryRelevance(XGBClassifier(random_state=0))
# train
classifier_over.fit(oversampled_train[features], oversampled_train[labels])
# predict
predictions_over = classifier_over.predict(oversampled_train[features])

In [168]:
# predict probability
predictions_proba_over = classifier_over.predict_proba(oversampled_train[features])

In [169]:
# model evaluation
get_evaluation_score(oversampled_train[labels], predictions_over, predictions_proba_over)

Accuracy score:  0.9816369621009079
Precision score:  0.9976503470444362
Recall score:  0.9948778556633235
F1 score:  0.9962594671338085
Confusion matrix for label toxic:
[[148545   1080]
 [  3434 268930]]
Confusion matrix for label severe_toxic:
[[281192    134]
 [   158 140505]]
Confusion matrix for label obscene:
[[196894    654]
 [  1245 223196]]
Confusion matrix for label threat:
[[171289     12]
 [     0 250688]]
Confusion matrix for label insult:
[[194034    929]
 [  1601 225425]]
Confusion matrix for label identity_hate:
[[247465    207]
 [   167 174150]]
Logarithmic Loss:  4.867162926431723
ROC AUC score:  0.9997509309201982


In [170]:
# evaluation on test data
predictions_over_test = classifier_over.predict(selected_test[features])
predictions_proba_over_test = classifier_over.predict_proba(selected_test[features])
get_evaluation_score(selected_test[labels], predictions_over_test, predictions_proba_over_test)

Accuracy score:  0.8719090937509769
Precision score:  0.5578638439034245
Recall score:  0.6776107049248172
F1 score:  0.6099251686299402
Confusion matrix for label toxic:
[[54383  3505]
 [ 1449  4641]]
Confusion matrix for label severe_toxic:
[[63206   405]
 [  218   149]]
Confusion matrix for label obscene:
[[58483  1804]
 [ 1093  2598]]
Confusion matrix for label threat:
[[63655   112]
 [  132    79]]
Confusion matrix for label insult:
[[58987  1564]
 [ 1303  2124]]
Confusion matrix for label identity_hate:
[[62907   359]
 [  479   233]]
Logarithmic Loss:  0.3176202046462899
ROC AUC score:  0.9531835703680447


Improved performance on training set, but no improvement for testing set performance.

### Hyperparameters Tuning

Official guideline: https://xgboost.readthedocs.io/en/stable/tutorials/param_tuning.html

Useful notes: https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/
- booster is choosen to be gbtree as it usually overperforms gblinear.
- min_child_weight [default=1] defines the minimum sum of weights of all observations required in a child. It is used to control over-fitting. Higher values prevent a model from learning relations that might be highly specific to the particular sample selected for a tree, but too high values can lead to under-fitting.
- max_depth [default=6] defines the maximum depth of a tree. It is used to control over-fitting as higher depth will allow the model to learn relations very specific to a particular sample.Typical values: 3-10
- gamma [default=0] specifies the minimum loss reduction required to make a split.
- subsample [default=1] denotes the fraction of observations to be random samples for each tree. Lower values make the algorithm more conservative and prevent overfitting, but too small values might lead to under-fitting. Typical values: 0.5-1
- colsample_bytree [default=1] denotes the fraction of columns to be random samples for each tree. Typical values: 0.5-1
- scale_pos_weight [default=1] A value greater than 0 should be used in case of high-class imbalance as it helps in faster convergence.

### XGBoost using BinaryRelevance

#### Tune scale_pos_weight

In [52]:
param_test = {
 'classifier__scale_pos_weight':range(1,18,4)
}

xgb_tuned = BinaryRelevance(XGBClassifier(random_state=0))
gsearch = GridSearchCV(estimator=xgb_tuned, param_grid=param_test, scoring='f1_weighted', cv=5, verbose=3)
gsearch.fit(selected_train[features], selected_train[labels])

Fitting 5 folds for each of 5 candidates, totalling 25 fits
[CV 1/5] END ....classifier__scale_pos_weight=1;, score=0.681 total time=10.3min
[CV 2/5] END ....classifier__scale_pos_weight=1;, score=0.684 total time= 3.8min
[CV 3/5] END ....classifier__scale_pos_weight=1;, score=0.687 total time= 3.7min
[CV 4/5] END ....classifier__scale_pos_weight=1;, score=0.686 total time= 3.7min
[CV 5/5] END ....classifier__scale_pos_weight=1;, score=0.680 total time= 3.6min
[CV 1/5] END ....classifier__scale_pos_weight=5;, score=0.687 total time= 3.7min
[CV 2/5] END ....classifier__scale_pos_weight=5;, score=0.689 total time= 3.7min
[CV 3/5] END ....classifier__scale_pos_weight=5;, score=0.694 total time= 3.6min
[CV 4/5] END ....classifier__scale_pos_weight=5;, score=0.682 total time= 3.7min
[CV 5/5] END ....classifier__scale_pos_weight=5;, score=0.681 total time= 3.7min
[CV 1/5] END ....classifier__scale_pos_weight=9;, score=0.672 total time= 3.7min
[CV 2/5] END ....classifier__scale_pos_weight=9;,

In [53]:
gsearch.best_params_, gsearch.best_score_

({'classifier__scale_pos_weight': 5}, 0.6867683858527105)

In [69]:
pd.DataFrame({'mean': gsearch.cv_results_['mean_test_score'], 'std': gsearch.cv_results_['std_test_score']})

Unnamed: 0,mean,std
0,0.683671,0.002551
1,0.686768,0.004814
2,0.670584,0.005683
3,0.655703,0.004298
4,0.642776,0.004863


scale_pos_weight=5 will be used for the following tuning process.

#### Tune min_child_weight and max_depth

In [54]:
param_test1 = {
 'classifier__max_depth':range(4,10,2),
 'classifier__min_child_weight':range(1,6,2)
}

xgb_tuned1 = BinaryRelevance(XGBClassifier(random_state=0, scale_pos_weight=5))
gsearch1 = GridSearchCV(estimator=xgb_tuned1, param_grid=param_test1, scoring='f1_weighted', cv=5, verbose=3)
gsearch1.fit(selected_train[features], selected_train[labels])

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5] END classifier__max_depth=4, classifier__min_child_weight=1;, score=0.688 total time= 2.9min
[CV 2/5] END classifier__max_depth=4, classifier__min_child_weight=1;, score=0.680 total time= 2.9min
[CV 3/5] END classifier__max_depth=4, classifier__min_child_weight=1;, score=0.688 total time= 2.6min
[CV 4/5] END classifier__max_depth=4, classifier__min_child_weight=1;, score=0.681 total time= 2.0min
[CV 5/5] END classifier__max_depth=4, classifier__min_child_weight=1;, score=0.681 total time= 2.0min
[CV 1/5] END classifier__max_depth=4, classifier__min_child_weight=3;, score=0.689 total time= 2.6min
[CV 2/5] END classifier__max_depth=4, classifier__min_child_weight=3;, score=0.681 total time= 3.1min
[CV 3/5] END classifier__max_depth=4, classifier__min_child_weight=3;, score=0.690 total time= 3.0min
[CV 4/5] END classifier__max_depth=4, classifier__min_child_weight=3;, score=0.677 total time= 3.0min
[CV 5/5] END classifie

In [55]:
gsearch1.best_params_, gsearch1.best_score_

({'classifier__max_depth': 8, 'classifier__min_child_weight': 5},
 0.6947792321121928)

In [63]:
pd.DataFrame({'mean': gsearch1.cv_results_['mean_test_score'], 'std': gsearch1.cv_results_['std_test_score']})

Unnamed: 0,mean,std
0,0.683644,0.003856
1,0.683588,0.004968
2,0.683543,0.004077
3,0.686768,0.004814
4,0.687648,0.003125
5,0.6881,0.004246
6,0.690663,0.006194
7,0.692115,0.003451
8,0.694779,0.005637


Although max_depth=8, min_child_weight=5 gives the best mean cross validation score, it has a relatively large standard deviation on performance. max_depth=8, min_child_weight=3 admits good results in terms of both mean and std, so it will be used as the parameters for the following tuning process.

#### Tune gamma

In [66]:
param_test2 = {
 'classifier__gamma':[i/10.0 for i in range(0,5)]
}

xgb_tuned2 = BinaryRelevance(XGBClassifier(random_state=0, scale_pos_weight=5, max_depth=8, min_child_weight=3))
gsearch2 = GridSearchCV(estimator=xgb_tuned2, param_grid=param_test2, scoring='f1_weighted', cv=5, verbose=3)
gsearch2.fit(selected_train[features], selected_train[labels])

Fitting 5 folds for each of 5 candidates, totalling 25 fits
[CV 1/5] END .............classifier__gamma=0.0;, score=0.693 total time= 4.5min
[CV 2/5] END .............classifier__gamma=0.0;, score=0.691 total time= 3.6min
[CV 3/5] END .............classifier__gamma=0.0;, score=0.698 total time= 3.6min
[CV 4/5] END .............classifier__gamma=0.0;, score=0.690 total time= 3.6min
[CV 5/5] END .............classifier__gamma=0.0;, score=0.688 total time= 3.5min
[CV 1/5] END .............classifier__gamma=0.1;, score=0.693 total time= 3.5min
[CV 2/5] END .............classifier__gamma=0.1;, score=0.690 total time= 4.0min
[CV 3/5] END .............classifier__gamma=0.1;, score=0.698 total time= 4.9min
[CV 4/5] END .............classifier__gamma=0.1;, score=0.690 total time= 5.1min
[CV 5/5] END .............classifier__gamma=0.1;, score=0.691 total time= 6.0min
[CV 1/5] END .............classifier__gamma=0.2;, score=0.691 total time= 6.0min
[CV 2/5] END .............classifier__gamma=0.2;,

In [67]:
gsearch2.best_params_, gsearch2.best_score_

({'classifier__gamma': 0.4}, 0.6943846925898665)

In [68]:
pd.DataFrame({'mean': gsearch2.cv_results_['mean_test_score'], 'std': gsearch2.cv_results_['std_test_score']})

Unnamed: 0,mean,std
0,0.692115,0.003451
1,0.692493,0.003107
2,0.692417,0.004925
3,0.692221,0.003616
4,0.694385,0.002866


gamma=0.4 will be used for the following tuning process.

#### Tune subsample and colsample_bytree

In [72]:
param_test3 = {
 'classifier__subsample':[i/100.0 for i in range(70,105,10)],
 'classifier__colsample_bytree':[i/100.0 for i in range(70,105,10)]
}

xgb_tuned3 = BinaryRelevance(XGBClassifier(random_state=0, scale_pos_weight=5, max_depth=8, min_child_weight=3, gamma=0.4))
gsearch3 = GridSearchCV(estimator=xgb_tuned3, param_grid=param_test3, scoring='f1_weighted', cv=5, verbose=3)
gsearch3.fit(selected_train[features], selected_train[labels])

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV 1/5] END classifier__colsample_bytree=0.7, classifier__subsample=0.7;, score=0.686 total time= 3.2min
[CV 2/5] END classifier__colsample_bytree=0.7, classifier__subsample=0.7;, score=0.686 total time= 3.2min
[CV 3/5] END classifier__colsample_bytree=0.7, classifier__subsample=0.7;, score=0.697 total time= 3.2min
[CV 4/5] END classifier__colsample_bytree=0.7, classifier__subsample=0.7;, score=0.682 total time= 3.2min
[CV 5/5] END classifier__colsample_bytree=0.7, classifier__subsample=0.7;, score=0.684 total time= 3.2min
[CV 1/5] END classifier__colsample_bytree=0.7, classifier__subsample=0.8;, score=0.690 total time= 3.4min
[CV 2/5] END classifier__colsample_bytree=0.7, classifier__subsample=0.8;, score=0.691 total time= 3.3min
[CV 3/5] END classifier__colsample_bytree=0.7, classifier__subsample=0.8;, score=0.695 total time= 3.3min
[CV 4/5] END classifier__colsample_bytree=0.7, classifier__subsample=0.8;, score=0.691 tota

[CV 3/5] END classifier__colsample_bytree=1.0, classifier__subsample=1.0;, score=0.700 total time= 4.7min
[CV 4/5] END classifier__colsample_bytree=1.0, classifier__subsample=1.0;, score=0.694 total time= 4.8min
[CV 5/5] END classifier__colsample_bytree=1.0, classifier__subsample=1.0;, score=0.692 total time= 4.8min


In [73]:
gsearch3.best_params_, gsearch3.best_score_

({'classifier__colsample_bytree': 0.8, 'classifier__subsample': 1.0},
 0.6944879828419817)

In [74]:
pd.DataFrame({'mean': gsearch3.cv_results_['mean_test_score'], 'std': gsearch3.cv_results_['std_test_score']})

Unnamed: 0,mean,std
0,0.68686,0.005304
1,0.690191,0.003273
2,0.691299,0.004652
3,0.69383,0.004474
4,0.688849,0.003028
5,0.689381,0.004028
6,0.693191,0.007019
7,0.694488,0.003801
8,0.687232,0.003617
9,0.689621,0.003875


Although colsample_bytree=0.8, subsample=1.0 gives the best mean cross validation score, it has a relatively large standard deviation on performance. colsample_bytree=1.0, subsample=1.0 admits good results in terms of both mean and std, so it will be used as the parameters.

### Tuned XGboost using BinaryRelevance

#### with scale_pos_weight

In [75]:
classifier_tuned = BinaryRelevance(XGBClassifier(random_state=0, scale_pos_weight=5, max_depth=8, min_child_weight=3, gamma=0.4, colsample_bytree=1.0, subsample=1.0))
# train
classifier_tuned.fit(selected_train[features], selected_train[labels])
# predict
predictions_tuned = classifier_tuned.predict(selected_train[features])

In [76]:
# predict probability
predictions_proba_tuned = classifier_tuned.predict_proba(selected_train[features])

In [77]:
# model evaluation
get_evaluation_score(selected_train[labels], predictions_tuned, predictions_proba_tuned)

Accuracy score:  0.9844833961058087
Precision score:  0.9390043200884849
Recall score:  0.9917374209356659
F1 score:  0.9644395176022822
Confusion matrix for label toxic:
[[142657   1620]
 [   288  15006]]
Confusion matrix for label severe_toxic:
[[157965     11]
 [     0   1595]]
Confusion matrix for label obscene:
[[150941    181]
 [     0   8449]]
Confusion matrix for label threat:
[[159091      2]
 [     0    478]]
Confusion matrix for label insult:
[[151214    480]
 [     2   7875]]
Confusion matrix for label identity_hate:
[[158158      8]
 [     0   1405]]
Logarithmic Loss:  0.21402482739880424
ROC AUC score:  0.9995545851728009


In [78]:
# evaluation on test data
predictions_tuned_test = classifier_tuned.predict(selected_test[features])
predictions_proba_tuned_test = classifier_tuned.predict_proba(selected_test[features])
get_evaluation_score(selected_test[labels], predictions_tuned_test, predictions_proba_tuned_test)

Accuracy score:  0.8584982337678577
Precision score:  0.5234314088780061
Recall score:  0.7230652503793626
F1 score:  0.599135780139154
Confusion matrix for label toxic:
[[53145  4743]
 [ 1124  4966]]
Confusion matrix for label severe_toxic:
[[63280   331]
 [  216   151]]
Confusion matrix for label obscene:
[[58001  2286]
 [  904  2787]]
Confusion matrix for label threat:
[[63719    48]
 [  154    57]]
Confusion matrix for label insult:
[[58548  2003]
 [ 1075  2352]]
Confusion matrix for label identity_hate:
[[63105   161]
 [  542   170]]
Logarithmic Loss:  0.31633182603332327
ROC AUC score:  0.9542429058723496


Tuned model is more overfitting, and does not perform better than baseline model.

#### Export Model

In [158]:
joblib.dump(classifier_tuned, 'xgb_tuned.sav')

['xgb_tuned.sav']

### Tuned XGBoost on Different Categories

In [123]:
labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

#### toxic Comment

In [121]:
param_test_toxic = {
 'max_depth':range(4,10,2),
 'min_child_weight':range(1,6,2),
 'gamma':[i/100.0 for i in range(0,5)]
}

label = labels[0]
weight = np.sqrt(len(selected_train[selected_train[label]==0]) / len(selected_train[selected_train[label]==1]))
xgb_tuned_toxic = XGBClassifier(random_state=0, scale_pos_weight=weight)
gsearch_toxic = GridSearchCV(estimator=xgb_tuned_toxic, param_grid=param_test_toxic, scoring='f1', cv=5, verbose=3)
gsearch_toxic.fit(selected_train[features], selected_train[label])

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV 1/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.747 total time=  25.8s
[CV 2/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.748 total time=  23.2s
[CV 3/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.742 total time=  18.2s
[CV 4/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.739 total time=  18.4s
[CV 5/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.741 total time=  18.0s
[CV 1/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.748 total time=  21.4s
[CV 2/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.748 total time=  28.7s
[CV 3/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.736 total time=  26.5s
[CV 4/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.737 total time=  28.0s
[CV 5/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.742 total time=  26.8s
[CV 1/5] END gamma=0.0, max_depth=4, min_child_w

[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.748 total time=  27.7s
[CV 3/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.742 total time=  27.5s
[CV 4/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.739 total time=  27.0s
[CV 5/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.741 total time=  26.6s
[CV 1/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.748 total time=  26.5s
[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.748 total time=  28.3s
[CV 3/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.737 total time=  28.9s
[CV 4/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.737 total time=  35.1s
[CV 5/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.742 total time=  26.8s
[CV 1/5] END gamma=0.02, max_depth=4, min_child_weight=5;, score=0.747 total time=  38.0s
[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=5;, score=0.745 total time=  29.1s
[CV 3/5] E

[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=1;, score=0.739 total time=  27.1s
[CV 5/5] END gamma=0.04, max_depth=4, min_child_weight=1;, score=0.741 total time=  25.9s
[CV 1/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.748 total time=  25.9s
[CV 2/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.750 total time=  26.1s
[CV 3/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.737 total time=  26.9s
[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.737 total time=  28.9s
[CV 5/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.742 total time=  18.4s
[CV 1/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.747 total time=  18.1s
[CV 2/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.745 total time=  18.1s
[CV 3/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.741 total time=  19.1s
[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.741 total time=  18.8s
[CV 5/5] E

In [122]:
gsearch_toxic.best_params_, gsearch_toxic.best_score_

({'gamma': 0.01, 'max_depth': 8, 'min_child_weight': 1}, 0.7477322377616027)

In [128]:
toxic_predictions = gsearch_toxic.predict(selected_train[features])
toxic_predictions_proba = gsearch_toxic.predict_proba(selected_train[features])
print('Model Performance on training set:')
get_evaluation_score_single_class(selected_train[label], toxic_predictions, toxic_predictions_proba[:, 1])
    
toxic_predictions_test = gsearch_toxic.predict(selected_test[features])
toxic_predictions_proba_test = gsearch_toxic.predict_proba(selected_test[features])
print('\nModel Performance on test set:')
get_evaluation_score_single_class(selected_test[label], toxic_predictions_test, toxic_predictions_proba_test[:, 1])

Model Performance on training set:
Accuracy score:  0.9932381197084683
Precision score:  0.9592298249014667
Recall score:  0.9707074669805152
F1 score:  0.9649345162653147
Confusion matrix:
[[143646    631]
 [   448  14846]]
Logarithmic Loss:  0.04001238080197113
ROC AUC score:  0.9993606477740298

Model Performance on test set:
Accuracy score:  0.9169401981931289
Precision score:  0.5437725631768953
Recall score:  0.7914614121510674
F1 score:  0.6446435736257857
Confusion matrix:
[[53844  4044]
 [ 1270  4820]]
Logarithmic Loss:  0.24281046430479108
ROC AUC score:  0.9465444554163138


#### Export Model

In [132]:
joblib.dump(gsearch_toxic, 'xgb_toxic.sav')

['xgb_toxic.sav']

#### severe_toxic Comment

In [133]:
param_test_severe_toxic = {
 'max_depth':range(4,10,2),
 'min_child_weight':range(1,6,2),
 'gamma':[i/100.0 for i in range(0,5)]
}

label = labels[1]
weight = np.sqrt(len(selected_train[selected_train[label]==0]) / len(selected_train[selected_train[label]==1]))
xgb_tuned_severe_toxic = XGBClassifier(random_state=0, scale_pos_weight=weight)
gsearch_severe_toxic = GridSearchCV(estimator=xgb_tuned_severe_toxic, param_grid=param_test_severe_toxic, scoring='f1', cv=5, verbose=3)
gsearch_severe_toxic.fit(selected_train[features], selected_train[label])

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV 1/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.444 total time=  12.9s
[CV 2/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.418 total time=  24.9s
[CV 3/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.421 total time=  27.4s
[CV 4/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.457 total time=  27.3s
[CV 5/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.457 total time=  26.5s
[CV 1/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.460 total time=  25.6s
[CV 2/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.427 total time=  26.7s
[CV 3/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.426 total time=  26.5s
[CV 4/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.456 total time=  26.0s
[CV 5/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.465 total time=  25.8s
[CV 1/5] END gamma=0.0, max_depth=4, min_child_w

[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.418 total time=  27.8s
[CV 3/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.421 total time=  26.6s
[CV 4/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.449 total time=  27.1s
[CV 5/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.479 total time=  26.9s
[CV 1/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.460 total time=  26.4s
[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.427 total time=  25.6s
[CV 3/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.426 total time=  25.7s
[CV 4/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.456 total time=  26.3s
[CV 5/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.465 total time=  25.6s
[CV 1/5] END gamma=0.02, max_depth=4, min_child_weight=5;, score=0.451 total time=  27.8s
[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=5;, score=0.442 total time=  27.0s
[CV 3/5] E

[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=1;, score=0.449 total time=  25.7s
[CV 5/5] END gamma=0.04, max_depth=4, min_child_weight=1;, score=0.479 total time=  25.8s
[CV 1/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.460 total time=  27.0s
[CV 2/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.424 total time=  26.1s
[CV 3/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.423 total time=  26.1s
[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.456 total time=  26.1s
[CV 5/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.465 total time=  25.1s
[CV 1/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.451 total time=  25.1s
[CV 2/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.442 total time=  25.0s
[CV 3/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.431 total time=  24.8s
[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.445 total time=  26.8s
[CV 5/5] E

In [134]:
gsearch_severe_toxic.best_params_, gsearch_severe_toxic.best_score_

({'gamma': 0.04, 'max_depth': 4, 'min_child_weight': 1}, 0.4500480874627436)

In [136]:
severe_toxic_predictions = gsearch_severe_toxic.predict(selected_train[features])
severe_toxic_predictions_proba = gsearch_severe_toxic.predict_proba(selected_train[features])
print('Model Performance on training set:')
get_evaluation_score_single_class(selected_train[label], severe_toxic_predictions, severe_toxic_predictions_proba[:, 1])
    
severe_toxic_predictions_test = gsearch_severe_toxic.predict(selected_test[features])
severe_toxic_predictions_proba_test = gsearch_severe_toxic.predict_proba(selected_test[features])
print('\nModel Performance on test set:')
get_evaluation_score_single_class(selected_test[label], severe_toxic_predictions_test, severe_toxic_predictions_proba_test[:, 1])

Model Performance on training set:
Accuracy score:  0.9893151011148642
Precision score:  0.4830665024630542
Recall score:  0.9836990595611286
F1 score:  0.6479454883336775
Confusion matrix:
[[156297   1679]
 [    26   1569]]
Logarithmic Loss:  0.023529759130557874
ROC AUC score:  0.9982318095062414

Model Performance on test set:
Accuracy score:  0.982228265966426
Precision score:  0.20610687022900764
Recall score:  0.7356948228882834
F1 score:  0.3220035778175313
Confusion matrix:
[[62571  1040]
 [   97   270]]
Logarithmic Loss:  0.040437376725231164
ROC AUC score:  0.9816045345780812


#### Export Model

In [137]:
joblib.dump(gsearch_severe_toxic, 'xgb_severe_toxic.sav')

['xgb_severe_toxic.sav']

#### obscene Comment

In [138]:
param_test_obscene = {
 'max_depth':range(4,10,2),
 'min_child_weight':range(1,6,2),
 'gamma':[i/100.0 for i in range(0,5)]
}

label = labels[2]
weight = np.sqrt(len(selected_train[selected_train[label]==0]) / len(selected_train[selected_train[label]==1]))
xgb_tuned_obscene = XGBClassifier(random_state=0, scale_pos_weight=weight)
gsearch_obscene = GridSearchCV(estimator=xgb_tuned_obscene, param_grid=param_test_obscene, scoring='f1', cv=5, verbose=3)
gsearch_obscene.fit(selected_train[features], selected_train[label])

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV 1/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.743 total time=  26.0s
[CV 2/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.749 total time=  25.9s
[CV 3/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.762 total time=  26.4s
[CV 4/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.756 total time=  26.4s
[CV 5/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.740 total time=  26.3s
[CV 1/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.751 total time=  27.0s
[CV 2/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.748 total time=  26.7s
[CV 3/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.763 total time=  26.5s
[CV 4/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.760 total time=  26.7s
[CV 5/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.741 total time=  25.1s
[CV 1/5] END gamma=0.0, max_depth=4, min_child_w

[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.749 total time=  25.8s
[CV 3/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.762 total time=  26.2s
[CV 4/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.758 total time=  27.4s
[CV 5/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.740 total time=  26.3s
[CV 1/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.751 total time=  26.9s
[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.748 total time=  25.5s
[CV 3/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.763 total time=  26.2s
[CV 4/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.760 total time=  26.2s
[CV 5/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.741 total time=  28.7s
[CV 1/5] END gamma=0.02, max_depth=4, min_child_weight=5;, score=0.745 total time=  27.0s
[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=5;, score=0.743 total time=  27.0s
[CV 3/5] E

[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=1;, score=0.760 total time=  21.3s
[CV 5/5] END gamma=0.04, max_depth=4, min_child_weight=1;, score=0.740 total time=  19.5s
[CV 1/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.751 total time=  18.4s
[CV 2/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.748 total time=  18.2s
[CV 3/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.763 total time=  18.3s
[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.760 total time=  18.6s
[CV 5/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.741 total time=  18.4s
[CV 1/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.747 total time=  18.6s
[CV 2/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.743 total time=  18.5s
[CV 3/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.761 total time=  18.3s
[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.754 total time=  18.5s
[CV 5/5] E

In [139]:
gsearch_obscene.best_params_, gsearch_obscene.best_score_

({'gamma': 0.02, 'max_depth': 8, 'min_child_weight': 3}, 0.766795657650886)

In [144]:
obscene_predictions = gsearch_obscene.predict(selected_train[features])
obscene_predictions_proba = gsearch_obscene.predict_proba(selected_train[features])
print('Model Performance on training set:')
get_evaluation_score_single_class(selected_train[label], obscene_predictions, obscene_predictions_proba[:, 1])
    
obscene_predictions_test = gsearch_obscene.predict(selected_test[features])
obscene_predictions_proba_test = gsearch_obscene.predict_proba(selected_test[features])
print('\nModel Performance on test set:')
get_evaluation_score_single_class(selected_test[label], obscene_predictions_test, obscene_predictions_proba_test[:, 1])

Model Performance on training set:
Accuracy score:  0.9991539816132003
Precision score:  0.9848378819687427
Recall score:  0.9994082139898213
F1 score:  0.992069552957763
Confusion matrix:
[[150992    130]
 [     5   8444]]
Logarithmic Loss:  0.011528874662963144
ROC AUC score:  0.9999929540333763

Model Performance on test set:
Accuracy score:  0.9501860014379944
Precision score:  0.5501392757660167
Recall score:  0.7491194798157681
F1 score:  0.6343925662498566
Confusion matrix:
[[58026  2261]
 [  926  2765]]
Logarithmic Loss:  0.13549020425412472
ROC AUC score:  0.9606467828588562


#### Export Model

In [145]:
joblib.dump(gsearch_obscene, 'xgb_obscene.sav')

['xgb_obscene.sav']

#### threat Comment

In [146]:
param_test_threat = {
 'max_depth':range(4,10,2),
 'min_child_weight':range(1,6,2),
 'gamma':[i/100.0 for i in range(0,5)]
}

label = labels[3]
weight = np.sqrt(len(selected_train[selected_train[label]==0]) / len(selected_train[selected_train[label]==1]))
xgb_tuned_threat = XGBClassifier(random_state=0, scale_pos_weight=weight)
gsearch_threat = GridSearchCV(estimator=xgb_tuned_threat, param_grid=param_test_threat, scoring='f1', cv=5, verbose=3)
gsearch_threat.fit(selected_train[features], selected_train[label])

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV 1/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.375 total time=  23.5s
[CV 2/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.310 total time=  28.5s
[CV 3/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.342 total time=  25.0s
[CV 4/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.264 total time=  21.5s
[CV 5/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.321 total time=  18.3s
[CV 1/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.337 total time=  18.5s
[CV 2/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.320 total time=  18.4s
[CV 3/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.365 total time=  18.5s
[CV 4/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.294 total time=  18.3s
[CV 5/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.319 total time=  18.9s
[CV 1/5] END gamma=0.0, max_depth=4, min_child_w

[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.310 total time=  18.4s
[CV 3/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.342 total time=  18.6s
[CV 4/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.290 total time=  17.9s
[CV 5/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.321 total time=  18.1s
[CV 1/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.337 total time=  18.1s
[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.320 total time=  17.6s
[CV 3/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.365 total time=  17.8s
[CV 4/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.294 total time=  17.6s
[CV 5/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.305 total time=  17.6s
[CV 1/5] END gamma=0.02, max_depth=4, min_child_weight=5;, score=0.394 total time=  17.7s
[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=5;, score=0.348 total time=  17.9s
[CV 3/5] E

[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=1;, score=0.290 total time=  17.4s
[CV 5/5] END gamma=0.04, max_depth=4, min_child_weight=1;, score=0.320 total time=  17.4s
[CV 1/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.337 total time=  17.5s
[CV 2/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.297 total time=  16.9s
[CV 3/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.342 total time=  17.2s
[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.294 total time=  17.0s
[CV 5/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.305 total time=  17.2s
[CV 1/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.394 total time=  17.2s
[CV 2/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.348 total time=  17.0s
[CV 3/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.324 total time=  17.1s
[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.305 total time=  16.8s
[CV 5/5] E

In [148]:
gsearch_threat.best_params_, gsearch_threat.best_score_

({'gamma': 0.02, 'max_depth': 4, 'min_child_weight': 5}, 0.34594524852521835)

In [149]:
threat_predictions = gsearch_threat.predict(selected_train[features])
threat_predictions_proba = gsearch_threat.predict_proba(selected_train[features])
print('Model Performance on training set:')
get_evaluation_score_single_class(selected_train[label], threat_predictions, threat_predictions_proba[:, 1])
    
threat_predictions_test = gsearch_threat.predict(selected_test[features])
threat_predictions_proba_test = gsearch_threat.predict_proba(selected_test[features])
print('\nModel Performance on test set:')
get_evaluation_score_single_class(selected_test[label], threat_predictions_test, threat_predictions_proba_test[:, 1])

Model Performance on training set:
Accuracy score:  0.9987466394269635
Precision score:  0.7050147492625368
Recall score:  1.0
F1 score:  0.8269896193771626
Confusion matrix:
[[158893    200]
 [     0    478]]
Logarithmic Loss:  0.006278288907850132
ROC AUC score:  0.9999253548364004

Model Performance on test set:
Accuracy score:  0.9953108881177904
Precision score:  0.33935018050541516
Recall score:  0.44549763033175355
F1 score:  0.38524590163934425
Confusion matrix:
[[63584   183]
 [  117    94]]
Logarithmic Loss:  0.01526678238104188
ROC AUC score:  0.9719604555595879


#### Export Model

In [151]:
joblib.dump(gsearch_threat, 'xgb_threat.sav')

['xgb_threat.sav']

#### insult Comment

In [152]:
param_test_insult = {
 'max_depth':range(4,10,2),
 'min_child_weight':range(1,6,2),
 'gamma':[i/100.0 for i in range(0,5)]
}

label = labels[4]
weight = np.sqrt(len(selected_train[selected_train[label]==0]) / len(selected_train[selected_train[label]==1]))
xgb_tuned_insult = XGBClassifier(random_state=0, scale_pos_weight=weight)
gsearch_insult = GridSearchCV(estimator=xgb_tuned_insult, param_grid=param_test_insult, scoring='f1', cv=5, verbose=3)
gsearch_insult.fit(selected_train[features], selected_train[label])

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV 1/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.682 total time=  24.4s
[CV 2/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.679 total time=  24.7s
[CV 3/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.695 total time=  25.1s
[CV 4/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.683 total time=  25.1s
[CV 5/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.683 total time=  24.7s
[CV 1/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.679 total time=  24.9s
[CV 2/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.682 total time=  24.7s
[CV 3/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.697 total time=  28.1s
[CV 4/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.680 total time=  26.9s
[CV 5/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.680 total time=  33.0s
[CV 1/5] END gamma=0.0, max_depth=4, min_child_w

[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.679 total time=  25.5s
[CV 3/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.695 total time=  25.4s
[CV 4/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.683 total time=  25.4s
[CV 5/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.686 total time=  25.3s
[CV 1/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.679 total time=  25.2s
[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.682 total time=  25.1s
[CV 3/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.697 total time=  25.5s
[CV 4/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.680 total time=  25.5s
[CV 5/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.680 total time=  26.7s
[CV 1/5] END gamma=0.02, max_depth=4, min_child_weight=5;, score=0.678 total time=  26.7s
[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=5;, score=0.680 total time=  25.1s
[CV 3/5] E

[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=1;, score=0.683 total time=  17.5s
[CV 5/5] END gamma=0.04, max_depth=4, min_child_weight=1;, score=0.686 total time=  17.6s
[CV 1/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.679 total time=  17.6s
[CV 2/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.682 total time=  17.3s
[CV 3/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.697 total time=  17.6s
[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.680 total time=  17.6s
[CV 5/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.680 total time=  17.9s
[CV 1/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.678 total time=  18.3s
[CV 2/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.680 total time=  18.4s
[CV 3/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.692 total time=  18.0s
[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.682 total time=  18.0s
[CV 5/5] E

In [153]:
gsearch_insult.best_params_, gsearch_insult.best_score_

({'gamma': 0.01, 'max_depth': 6, 'min_child_weight': 3}, 0.6927503840714309)

In [155]:
insult_predictions = gsearch_insult.predict(selected_train[features])
insult_predictions_proba = gsearch_insult.predict_proba(selected_train[features])
print('Model Performance on training set:')
get_evaluation_score_single_class(selected_train[label], insult_predictions, insult_predictions_proba[:, 1])
    
insult_predictions_test = gsearch_insult.predict(selected_test[features])
insult_predictions_proba_test = gsearch_insult.predict_proba(selected_test[features])
print('\nModel Performance on test set:')
get_evaluation_score_single_class(selected_test[label], insult_predictions_test, insult_predictions_proba_test[:, 1])

Model Performance on training set:
Accuracy score:  0.9841136547367629
Precision score:  0.7763865894039735
Recall score:  0.9525199949219246
F1 score:  0.8554814434752865
Confusion matrix:
[[149533   2161]
 [   374   7503]]
Logarithmic Loss:  0.0482352058150126
ROC AUC score:  0.9967075266158543

Model Performance on test set:
Accuracy score:  0.9454031073181406
Precision score:  0.49357226334242305
Recall score:  0.7394222351911293
F1 score:  0.5919869174161896
Confusion matrix:
[[57951  2600]
 [  893  2534]]
Logarithmic Loss:  0.12881057571564947
ROC AUC score:  0.9586416280638289


#### Export Model

In [156]:
joblib.dump(gsearch_insult, 'xgb_insult.sav')

['xgb_insult.sav']

#### identity_hate Comment

In [159]:
param_test_identity_hate = {
 'max_depth':range(4,10,2),
 'min_child_weight':range(1,6,2),
 'gamma':[i/100.0 for i in range(0,5)]
}

label = labels[5]
weight = np.sqrt(len(selected_train[selected_train[label]==0]) / len(selected_train[selected_train[label]==1]))
xgb_tuned_identity_hate = XGBClassifier(random_state=0, scale_pos_weight=weight)
gsearch_identity_hate = GridSearchCV(estimator=xgb_tuned_identity_hate, param_grid=param_test_identity_hate, scoring='f1', cv=5, verbose=3)
gsearch_identity_hate.fit(selected_train[features], selected_train[label])

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV 1/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.364 total time=  13.0s
[CV 2/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.331 total time=  22.8s
[CV 3/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.343 total time=  31.3s
[CV 4/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.336 total time=  29.9s
[CV 5/5] END gamma=0.0, max_depth=4, min_child_weight=1;, score=0.348 total time=  26.2s
[CV 1/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.360 total time=  26.2s
[CV 2/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.343 total time=  25.8s
[CV 3/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.327 total time=  25.8s
[CV 4/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.316 total time=  26.0s
[CV 5/5] END gamma=0.0, max_depth=4, min_child_weight=3;, score=0.354 total time=  25.7s
[CV 1/5] END gamma=0.0, max_depth=4, min_child_w

[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.326 total time=  26.2s
[CV 3/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.328 total time=  24.9s
[CV 4/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.332 total time=  24.6s
[CV 5/5] END gamma=0.02, max_depth=4, min_child_weight=1;, score=0.348 total time=  25.5s
[CV 1/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.360 total time=  24.7s
[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.343 total time=  25.0s
[CV 3/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.329 total time=  24.8s
[CV 4/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.316 total time=  24.8s
[CV 5/5] END gamma=0.02, max_depth=4, min_child_weight=3;, score=0.354 total time=  25.2s
[CV 1/5] END gamma=0.02, max_depth=4, min_child_weight=5;, score=0.362 total time=  24.6s
[CV 2/5] END gamma=0.02, max_depth=4, min_child_weight=5;, score=0.331 total time=  24.3s
[CV 3/5] E

[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=1;, score=0.338 total time=  24.7s
[CV 5/5] END gamma=0.04, max_depth=4, min_child_weight=1;, score=0.348 total time=  24.3s
[CV 1/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.360 total time=  24.3s
[CV 2/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.343 total time=  24.3s
[CV 3/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.329 total time=  24.2s
[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.316 total time=  24.8s
[CV 5/5] END gamma=0.04, max_depth=4, min_child_weight=3;, score=0.354 total time=  24.1s
[CV 1/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.362 total time=  24.0s
[CV 2/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.323 total time=  24.2s
[CV 3/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.325 total time=  24.0s
[CV 4/5] END gamma=0.04, max_depth=4, min_child_weight=5;, score=0.339 total time=  24.4s
[CV 5/5] E

In [160]:
gsearch_identity_hate.best_params_, gsearch_identity_hate.best_score_

({'gamma': 0.02, 'max_depth': 6, 'min_child_weight': 5}, 0.3494472830253128)

In [162]:
identity_hate_predictions = gsearch_identity_hate.predict(selected_train[features])
identity_hate_predictions_proba = gsearch_identity_hate.predict_proba(selected_train[features])
print('Model Performance on training set:')
get_evaluation_score_single_class(selected_train[label], identity_hate_predictions, identity_hate_predictions_proba[:, 1])
    
identity_hate_predictions_test = gsearch_identity_hate.predict(selected_test[features])
identity_hate_predictions_proba_test = gsearch_identity_hate.predict_proba(selected_test[features])
print('\nModel Performance on test set:')
get_evaluation_score_single_class(selected_test[label], identity_hate_predictions_test, identity_hate_predictions_proba_test[:, 1])

Model Performance on training set:
Accuracy score:  0.9983894316636481
Precision score:  0.8457831325301205
Recall score:  0.999288256227758
F1 score:  0.9161500815660685
Confusion matrix:
[[157910    256]
 [     1   1404]]
Logarithmic Loss:  0.010622041982084454
ROC AUC score:  0.9999615341744424

Model Performance on test set:
Accuracy score:  0.9866204007627623
Precision score:  0.3994413407821229
Recall score:  0.40168539325842695
F1 score:  0.40056022408963576
Confusion matrix:
[[62836   430]
 [  426   286]]
Logarithmic Loss:  0.040029493046281235
ROC AUC score:  0.9531268148360214


#### Export Model

In [163]:
joblib.dump(gsearch_identity_hate, 'xgb_identity_hate.sav')

['xgb_identity_hate.sav']