In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np 
import seaborn as sns
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import IsolationForest
warnings.filterwarnings('ignore')


In [None]:
df = pd.read_csv('/kaggle/input/creditcardfraud/creditcard.csv')
df.head()

In [None]:
df.dtypes

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
corr = df.corrwith(df['Class']).reset_index()
corr.columns = ['Index','Correlations']
corr = corr.set_index('Index')
corr = corr.sort_values(by=['Correlations'], ascending = False)
plt.figure(figsize=(4,15))
fig = sns.heatmap(corr, annot=True, fmt="g", cmap='YlGnBu')
plt.title("Correlation of Variables with Class")
plt.show()

In [None]:
plt.figure(figsize=(15,4))
fig = sns.distplot(df['Time'], kde=False, color="green")
plt.show()

In [None]:
plt.figure(figsize=(8,4))
fig = plt.scatter(x=df[df['Class'] == 1]['Time'], y=df[df['Class'] == 1]['Amount'], color="c")
plt.title("Time vs Transaction Amount in Fraud Cases")
plt.show()

In [None]:
plt.figure(figsize=(8,4))
fig = plt.scatter(x=df[df['Class'] == 0]['Time'], y=df[df['Class'] == 0]['Amount'], color="dodgerblue")
plt.title("Time vs Transaction Amount in Legit Cases")
plt.show()

In [None]:
df.hist(figsize=(20,20), color = "salmon")
plt.show()

In [None]:
plt.figure(figsize=(7,5))
fig = sns.countplot(x="Class", data=df)
plt.show()

In [None]:
inliers = df[df.Class==0]
inliers = inliers.drop(['Class'], axis=1)
outliers = df[df.Class==1]
outliers = outliers.drop(['Class'], axis=1)
inliers_train, inliers_test = train_test_split(inliers, test_size=0.30, random_state=42)

In [None]:
model = IsolationForest()
model.fit(inliers_train)
inlier_pred_test = model.predict(inliers_test)
outlier_pred = model.predict(outliers)

In [None]:
print("Accuracy in Detecting Legit Cases:", list(inlier_pred_test).count(1)/inlier_pred_test.shape[0])
print("Accuracy in Detecting Fraud Cases:", list(outlier_pred).count(-1)/outlier_pred.shape[0])

In [None]:
from sklearn.neighbors import LocalOutlierFactor

model = LocalOutlierFactor(novelty=True)
model.fit(inliers_train)
inlier_pred_test = model.predict(inliers_test)
outlier_pred = model.predict(outliers)

In [None]:
print("Accuracy in Detecting Legit Cases:", list(inlier_pred_test).count(1)/inlier_pred_test.shape[0])
print("Accuracy in Detecting Fraud Cases:", list(outlier_pred).count(-1)/outlier_pred.shape[0])