In [1]:
import numpy as np
import sklearn as sk
import pandas as pd
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

In [2]:
X_train = pickle.load(open('./data/X_train.pckl', 'rb'))
X_test = pickle.load(open('./data/X_test.pckl', 'rb'))
y_train = pickle.load(open('./data/y_train.pckl', 'rb'))
y_test = pickle.load(open('./data/y_test.pckl', 'rb'))
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [3]:
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)

In [10]:
y_train_estimated, y_test_estimated = lr.predict(X_train), lr.predict(X_test)
train_class_report, test_class_report = classification_report(y_train, y_train_estimated), classification_report(y_test, y_test_estimated)
print ('Train set results:')
print(train_class_report)
print('Test set results:')
print(test_class_report)

Train set results:
              precision    recall  f1-score   support

     cheater       0.63      0.05      0.10     45058
       legit       0.84      0.99      0.91    224942

    accuracy                           0.84    270000
   macro avg       0.73      0.52      0.50    270000
weighted avg       0.80      0.84      0.77    270000

Test set results:
              precision    recall  f1-score   support

     cheater       0.63      0.05      0.10     14942
       legit       0.84      0.99      0.91     75058

    accuracy                           0.84     90000
   macro avg       0.74      0.52      0.51     90000
weighted avg       0.81      0.84      0.78     90000



In [11]:
lrcv = LogisticRegressionCV(max_iter=1000)
lrcv.fit(X_train, y_train)

In [12]:
y_train_estimated_cv, y_test_estimated_cv = lrcv.predict(X_train), lrcv.predict(X_test)
train_class_report_cv, test_class_report_cv = classification_report(y_train, y_train_estimated_cv), classification_report(y_test, y_test_estimated_cv)
print ('Train set results:')
print(train_class_report_cv)
print('Test set results:')
print(test_class_report_cv)

Train set results:
              precision    recall  f1-score   support

     cheater       0.63      0.05      0.10     45058
       legit       0.84      0.99      0.91    224942

    accuracy                           0.84    270000
   macro avg       0.74      0.52      0.50    270000
weighted avg       0.80      0.84      0.77    270000

Test set results:
              precision    recall  f1-score   support

     cheater       0.63      0.05      0.10     14942
       legit       0.84      0.99      0.91     75058

    accuracy                           0.84     90000
   macro avg       0.74      0.52      0.50     90000
weighted avg       0.81      0.84      0.78     90000



In [9]:
mlpc = MLPClassifier(activation='logistic', max_iter=1000)
mlpc.fit(X_train, y_train)

In [8]:
y_train_estimated_mlp, y_test_estimated_mlp = mlpc.predict(X_train), mlpc.predict(X_test)
train_class_report, test_class_report = classification_report(y_train, y_train_estimated_mlp), classification_report(y_test, y_test_estimated_mlp)
print ('Train set results:')
print(train_class_report)
print('Test set results:')
print(test_class_report)

Train set results:
              precision    recall  f1-score   support

     cheater       0.75      0.19      0.31     45058
       legit       0.86      0.99      0.92    224942

    accuracy                           0.85    270000
   macro avg       0.80      0.59      0.61    270000
weighted avg       0.84      0.85      0.82    270000

Test set results:
              precision    recall  f1-score   support

     cheater       0.43      0.11      0.18     14942
       legit       0.85      0.97      0.90     75058

    accuracy                           0.83     90000
   macro avg       0.64      0.54      0.54     90000
weighted avg       0.78      0.83      0.78     90000

