In [34]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler

In [2]:
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import ADASYN

In [3]:
df = pd.read_csv('creditcard.csv')

In [4]:
y0 = df[df.Class==0].Class
y1 = df[df.Class==1].Class

In [5]:
X0 = df[df.Class==0].drop(columns=['Class'])
X1 = df[df.Class==1].drop(columns=['Class'])

In [6]:
X0_train, X0_test, y0_train, y0_test = train_test_split(X0, y0)

In [7]:
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1)

In [8]:
X_train = pd.concat([X0_train, X1_train])
X_test = pd.concat([X0_test, X1_test])
y_train = y0_train.append(y1_train)
y_test = y0_test.append(y1_test)

In [22]:
def evaluate(classifier, X_train, y_train):
    classifier.fit(X_train, y_train)
    prediction = classifier.predict(X_test)
    print('Score: ' + str(classifier.score(X_test, y_test)))
    print(classification_report(y_test, prediction))

In [10]:
time_scaler = RobustScaler()
amount_scaler = RobustScaler()
X_train['time_scaled'] = time_scaler.fit_transform(X_train.Time.values.reshape(-1,1))
X_train['amount_scaled'] = amount_scaler.fit_transform(X_train.Amount.values.reshape(-1,1))
X_train = X_train.drop(columns=['Time', 'Amount'])

In [11]:
X_test['time_scaled'] = time_scaler.transform(X_test.Time.values.reshape(-1,1))
X_test['amount_scaled'] = amount_scaler.transform(X_test.Amount.values.reshape(-1,1))
X_test = X_test.drop(columns=['Time', 'Amount'])

In [12]:
undersampler = RandomUnderSampler(sampling_strategy=0.5)
X_train_under, y_train_under = undersampler.fit_resample(X_train, y_train)

In [13]:
oversampler = RandomOverSampler(sampling_strategy=0.5)
X_train_over, y_train_over = oversampler.fit_resample(X_train, y_train)

In [14]:
adasyn_sampler = ADASYN(sampling_strategy=0.5)
X_train_adasyn, y_train_adasyn = adasyn_sampler.fit_resample(X_train, y_train)

In [24]:
perceptron = Perceptron()
evaluate(perceptron, X_train, y_train)

Score: 0.9989887924496503
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     71079
           1       0.68      0.78      0.73       123

    accuracy                           1.00     71202
   macro avg       0.84      0.89      0.86     71202
weighted avg       1.00      1.00      1.00     71202



In [25]:
perceptron_under = Perceptron()
evaluate(perceptron_under, X_train_under, y_train_under)

Score: 0.941967922249375
              precision    recall  f1-score   support

           0       1.00      0.94      0.97     71079
           1       0.03      0.86      0.05       123

    accuracy                           0.94     71202
   macro avg       0.51      0.90      0.51     71202
weighted avg       1.00      0.94      0.97     71202



In [27]:
perceptron_over = Perceptron()
evaluate(perceptron_over, X_train_over, y_train_over)

Score: 0.9764332462571276
              precision    recall  f1-score   support

           0       1.00      0.98      0.99     71079
           1       0.06      0.85      0.11       123

    accuracy                           0.98     71202
   macro avg       0.53      0.92      0.55     71202
weighted avg       1.00      0.98      0.99     71202



In [28]:
prediction = perceptron_adasyn.predict(X_test)

In [29]:
perceptron_adasyn = Perceptron()
evaluate(perceptron_adasyn, X_train_adasyn, y_train_adasyn)

Score: 0.9587511586753181
              precision    recall  f1-score   support

           0       1.00      0.96      0.98     71079
           1       0.03      0.85      0.07       123

    accuracy                           0.96     71202
   macro avg       0.52      0.91      0.52     71202
weighted avg       1.00      0.96      0.98     71202



In [30]:
logit = LogisticRegression()
evaluate(logit, X_train, y_train)

Score: 0.9991432824920649
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     71079
           1       0.84      0.62      0.71       123

    accuracy                           1.00     71202
   macro avg       0.92      0.81      0.86     71202
weighted avg       1.00      1.00      1.00     71202



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html.
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [31]:
logit_over = LogisticRegression()
evaluate(logit_over, X_train_over, y_train_over)

Score: 0.9885817814106346
              precision    recall  f1-score   support

           0       1.00      0.99      0.99     71079
           1       0.12      0.87      0.21       123

    accuracy                           0.99     71202
   macro avg       0.56      0.93      0.60     71202
weighted avg       1.00      0.99      0.99     71202



In [32]:
logit_under = LogisticRegression()
evaluate(logit_under, X_train_under, y_train_under)

Score: 0.9810819920788741
              precision    recall  f1-score   support

           0       1.00      0.98      0.99     71079
           1       0.08      0.89      0.14       123

    accuracy                           0.98     71202
   macro avg       0.54      0.93      0.56     71202
weighted avg       1.00      0.98      0.99     71202



In [33]:
logit_adasyn = LogisticRegression()
evaluate(logit_adasyn, X_train_adasyn, y_train_adasyn)

Score: 0.9595516979860116
              precision    recall  f1-score   support

           0       1.00      0.96      0.98     71079
           1       0.04      0.89      0.07       123

    accuracy                           0.96     71202
   macro avg       0.52      0.92      0.52     71202
weighted avg       1.00      0.96      0.98     71202

