In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df=pd.read_csv("/kaggle/input/creditcardfraud/creditcard.csv")

In [None]:
df.head()

In [None]:
df.tail()


In [None]:
df.info()

In [None]:
#here we check for null so that we don't have any 
#missing values because if we have we will have to do more processing
df.isnull().sum()

## Univariate analysis

In [None]:
df.describe()

In [None]:
# distribution of legit & fraud transaction
# 0 --> Normal Transaction
# 1 --> Fraud Transaction
legit = df[df.Class == 0]
fraud = df[df.Class == 1]

In [None]:
print(legit.shape)
print(fraud.shape)

In [None]:
# statistical measures of the data
legit.Amount.describe()

In [None]:
fraud.Amount.describe()

In [None]:
# compare the values for both transactions
df.groupby('Class').mean()

In [None]:
df.drop(['Time'], axis=1,inplace=True)

## Bivariate  Analysis

In [None]:
df.corr()

In [None]:
plt.figure(figsize= (17,9))
sns.heatmap(df.corr(),annot=True,fmt='0.2f')

##  Imbalanced classess detection

In [None]:
# distribution of legit & fraud transaction
# 0 --> Normal Transaction
# 1 --> Fraud Transaction
df['Class'].value_counts()

In [None]:
df['Class'].value_counts()  / len(df)* 100

In [None]:
sns.countplot(x='Class', data=df)

## handle an imbalanced class problem 

In [None]:
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
x = df.drop('Class', axis=1) #input data
y = df['Class'] #output data

# setting up testing and training sets
#splitting data to test=25% , training=75%
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)
sm = SMOTE(random_state=27)
x_train, y_train = sm.fit_resample(x_train, y_train)
x_test, y_test = sm.fit_resample(x_test, y_test)



In [None]:
y_train

In [None]:
x_train

In [None]:
x_test

In [None]:
df['Class'].value_counts()

In [None]:
sns.countplot(x='Class', data=df)

## Classical Machine learning  Models

## Linear Models

# **1.Logistic Regression** 

In [None]:
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(x_train, y_train)

In [None]:
#evaluation on train data
y_pre=model.predict(x_train)
print(classification_report(y_train, y_pre))

In [None]:
#evaluation on test data
y_pre=model.predict(x_test)
print(classification_report(y_test, y_pre))

In [None]:
#confusion_matrix
from sklearn.metrics import confusion_matrix,classification_report
model = LogisticRegression()
model.fit(x_train, y_train)
y_pre=model.predict(x_test)


In [None]:
print(confusion_matrix(y_test,y_pre))

# **2.Passive Aggressive Classifier**

In [None]:
from sklearn.linear_model import PassiveAggressiveClassifier
model = PassiveAggressiveClassifier()
model.fit(x_train, y_train)

In [None]:
#evaluation on train data
y_pre=model.predict(x_train)
print(classification_report(y_train, y_pre))

In [None]:
#evaluation on test
y_pre=model.predict(x_test)
print(classification_report(y_test, y_pre))

In [None]:
#confusion_matrix
from sklearn.metrics import confusion_matrix,classification_report
model = PassiveAggressiveClassifier()
model.fit(x_train, y_train)
y_pre=model.predict(x_test)

In [None]:
print(confusion_matrix(y_test,y_pre))

# **3.Perceptron**

In [None]:
from sklearn.linear_model import Perceptron
model = Perceptron()
model.fit(x_train, y_train)

In [None]:
#evaluation on train data
y_pre=model.predict(x_train)
print(classification_report(y_train, y_pre))

In [None]:
#evaluation on test
y_pre=model.predict(x_test)
print(classification_report(y_test, y_pre))

In [None]:
#confusion_matrix
from sklearn.metrics import confusion_matrix,classification_report
model = Perceptron()
model.fit(x_train, y_train)
y_pre=model.predict(x_test)

In [None]:
print(confusion_matrix(y_test,y_pre))

# **4.RidgeClassifier**

In [None]:
from sklearn.linear_model import RidgeClassifier
model = RidgeClassifier()
model.fit(x_train, y_train)

In [None]:
#evaluation on train data
y_pre=model.predict(x_train)
print(classification_report(y_train, y_pre))

In [None]:
#evaluation on test
y_pre=model.predict(x_test)
print(classification_report(y_test, y_pre))

In [None]:
#confusion_matrix
from sklearn.metrics import confusion_matrix,classification_report
model = RidgeClassifier()
model.fit(x_train, y_train)
y_pre=model.predict(x_test)

In [None]:
print(confusion_matrix(y_test,y_pre))

# **5.SGDClassifier**

In [None]:
from sklearn.linear_model import SGDClassifier
model =SGDClassifier()
model.fit(x_train, y_train)


In [None]:
#evaluation on train data
y_pre=model.predict(x_train)
print(classification_report(y_train, y_pre))

In [None]:
#evaluation on test
y_pre=model.predict(x_test)
print(classification_report(y_test, y_pre))

In [None]:
#confusion_matrix
from sklearn.metrics import confusion_matrix,classification_report
model =SGDClassifier()
model.fit(x_train, y_train)
y_pre=model.predict(x_test)

In [None]:
print(confusion_matrix(y_test,y_pre))