In [6]:
!pip install imblearn



In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score
from imblearn.over_sampling import SMOTE
from collections import Counter



In [3]:
train = pd.read_csv('train.csv')

In [4]:
train.head()

Unnamed: 0,id,Time,V1,V2,V3,V4,V5,V6,V7,V8,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,0.0,2.074329,-0.129425,-1.137418,0.412846,-0.192638,-1.210144,0.110697,-0.263477,...,-0.334701,-0.88784,0.336701,-0.110835,-0.291459,0.207733,-0.076576,-0.059577,1.98,0
1,1,0.0,1.998827,-1.250891,-0.520969,-0.894539,-1.122528,-0.270866,-1.029289,0.050198,...,0.054848,-0.038367,0.133518,-0.461928,-0.465491,-0.464655,-0.009413,-0.038238,84.0,0
2,2,0.0,0.091535,1.004517,-0.223445,-0.435249,0.667548,-0.988351,0.948146,-0.084789,...,-0.326725,-0.803736,0.154495,0.951233,-0.506919,0.085046,0.224458,0.087356,2.69,0
3,3,0.0,1.979649,-0.184949,-1.064206,0.120125,-0.215238,-0.648829,-0.087826,-0.035367,...,-0.095514,-0.079792,0.167701,-0.042939,0.000799,-0.096148,-0.05778,-0.073839,1.0,0
4,4,0.0,1.025898,-0.171827,1.203717,1.2439,-0.636572,1.099074,-0.938651,0.569239,...,0.099157,0.608908,0.027901,-0.262813,0.257834,-0.252829,0.108338,0.021051,1.0,0


In [5]:
X = train.drop(['Class', 'id'], axis=1)
y = train['Class']

In [6]:
print(Counter(y))

Counter({0: 218660, 1: 469})


In [7]:
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X, y)

In [8]:
print(Counter(y_res))

Counter({0: 218660, 1: 218660})


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, train_size=0.8, stratify=y_res)

In [None]:
scaler = StandardScaler()
clf = LogisticRegression()

pipe = Pipeline([
    ('scaler', scaler),
    ('classifier', clf)
])

param_grid = {
    'classifier__C': [0.1, 1, 10],
    'classifier__penalty': ['l1', 'l2']
}

grid = GridSearchCV(pipe, param_grid, cv=5, scoring='roc_auc')
grid.fit(X_train, y_train)

In [None]:
print("Best Parameters:", grid.best_params_)
print("Best Score:", grid.best_score_)

In [None]:
roc_auc_score(y_test, grid.best_estimator_.predict(X_test))