In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [None]:
data = pd.read_csv('../input/creditcardfraud/creditcard.csv')
data.head()

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [None]:
data.shape

In [None]:
round(data.describe(),2)

In [None]:
sns.histplot(data, x='Time');

In [None]:
data.Amount.describe()

In [None]:
data.Amount.plot();

In [None]:
plt.figure(figsize=[25,15])
sns.heatmap(round(data.corr(), 2), annot= True);

In [None]:
from sklearn.model_selection import train_test_split

X = data.drop(["Class"], axis = 1).values
y = data["Class"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0, test_size = 0.25)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train, y_train)

print('Training Accuracy: ', model.score(X_train, y_train))
print('Testing Accuracy: ',model.score(X_test, y_test))

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, model.predict(X_test)))

In [None]:
data.Class.value_counts(normalize=True)

In [None]:
from imblearn.over_sampling import SMOTE

oversample = SMOTE()
X_new, y_new = oversample.fit_resample(X, y)
X_train, X_test, y_train, y_test = train_test_split(X_new, y_new, random_state = 0, test_size = 0.25)
scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
model2 = LogisticRegression(solver='liblinear')
model2.fit(X_train, y_train)

print('Training Accuracy: ', model2.score(X_train, y_train))
print('Testing Accuracy: ',model2.score(X_test, y_test))

In [None]:
print(classification_report(y_test, model2.predict(X_test)))

In [None]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(model2, X_new, y_new, scoring='roc_auc', cv=5, n_jobs=-1)
print(scores)
print('Mean ROC AUC: %.3f' % np.mean(scores))

In [None]:
scores = cross_val_score(model2, X_new, y_new, scoring='f1', cv=5, n_jobs=-1)
print(scores)
print('Mean F1 Score: %.3f' % np.mean(scores))

In [None]:
scores = cross_val_score(model2, X_new, y_new, scoring='precision', cv=5, n_jobs=-1)
print(scores)
print('Mean Precission: %.3f' % np.mean(scores))

In [None]:
scores = cross_val_score(model2, X_new, y_new, scoring='recall', cv=5, n_jobs=-1)
print(scores)
print('Mean Recall: %.3f' % np.mean(scores))

In [None]:
from sklearn.metrics import confusion_matrix, plot_confusion_matrix


plot_confusion_matrix(model2, X_test, y_test)  