### Logistic Regression with Elmo encodings

In [7]:
import numpy as np
import pandas as pd
import time
# for model:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split
# for scoring:
from sklearn.metrics import f1_score
from sklearn import metrics

In [2]:
# a dataframe was created with the cleaned text so no pre-processing needed.
x = np.loadtxt("toxic_elmo_matrix.out", delimiter=",")
df = pd.read_csv('toxic-train-clean.csv')
y = df.iloc[:, 2:8]

In [3]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size= 0.2)

In [4]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((127656, 128), (31915, 128), (127656, 6), (31915, 6))

In [28]:
# started out testing the liblinear and sag solvers but found that sag did not converge and liblinear is 
# better for onevsrest, which is what this problem is. Used balanced for class_weight since data is imbalanced.
start_time=time.time()

pipe = make_pipeline(OneVsRestClassifier(LogisticRegression(max_iter=500, class_weight='balanced')))
param_grid = {'onevsrestclassifier__estimator__solver': ['liblinear', 'sag']} 
grid = GridSearchCV(pipe, param_grid, cv=5, scoring='roc_auc', n_jobs=-1, verbose=3)

grid2 = grid.fit(X_train, y_train)

end_time=time.time()
print("total time",end_time-start_time)

Fitting 5 folds for each of 2 candidates, totalling 10 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   7 out of  10 | elapsed:  9.5min remaining:  4.1min
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed: 14.8min finished


total time 903.7850787639618


In [30]:
grid2.best_score_

0.8746263198008044

In [29]:
grid2.best_params_

{'onevsrestclassifier__estimator__solver': 'liblinear'}

In [31]:
predicted_y_test = grid2.predict(X_test)
predicted_y_test[:1]

array([[1, 1, 1, 1, 1, 0]])

In [32]:
y_pred_prob = grid2.predict_proba(X_test)
y_pred_prob[:1]

array([[0.72423033, 0.55257321, 0.72538485, 0.63278861, 0.71740703,
        0.11654211]])

We focused on ROC_AUC and F1 scores because of the largely imbalanced classes, the main difference in scores across all our different models is the F1 score.

In [33]:
auc_score = metrics.roc_auc_score(y_test, y_pred_prob)
auc_score

0.8827557582595694

In [34]:
f1_score(y_test, predicted_y_test, average='micro')

0.23584470371102764