# Credit Card Fraud Detection using random forest

Steps
1. Import libraries
2. Import the datase
3. Perform data analysis (data cleaning, data  manipulation, data visualization, EDA)
4. Data Preprocessing - feature engineering (encoders, scaling, feature selection, hyper-parameter tuning)
5. Splitting the dataset 
6. Model selection
7. Train the model
8. Test the model
9. Performance metric

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier #for feature selection

import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv('creditcard.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.columns

In [None]:
df.Class.value_counts()

In [None]:
plt.figure(figsize = (25,25))
sns.heatmap(df.corr(), annot = True, cmap = 'Greens')

# Handling imbalanced dataset

In [None]:
pip install imbalanced-learn 

In [None]:
from imblearn.over_sampling import SMOTE

In [None]:
x = df.drop('Class', axis = 'columns')
y = df['Class']

In [None]:
smote = SMOTE(sampling_strategy = 'minority')
x_sm, y_sm = smote.fit_resample(x,y)

y_sm.value_counts()

# Feature Selection/Feature Importance

In [None]:
model = ExtraTreesClassifier()
model.fit(x_sm, y_sm)

In [None]:
model.feature_importances_

In [None]:
plt.figure(figsize = (10,10))
feat = pd.Series(model.feature_importances_, index = x_sm.columns)
feat.nlargest(18).plot(kind = 'barh')

In [None]:
plot = feat.nlargest(18)

In [None]:
plot.index

In [None]:
cols = ['V14', 'V11', 'V12', 'V17', 'V4', 'V10', 'V3', 'V16', 'V18', 'V9', 'V1',
       'V2', 'V7', 'V19', 'Time', 'V8', 'V21', 'V13']

In [None]:
x_new = x_sm[cols]

In [None]:
x_sm.shape

In [None]:
x_new.shape

# Train Test Split

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x_new, y_sm, test_size = 0.2, random_state = 42)

In [None]:
randomf = RandomForestClassifier()

In [None]:
randomf.fit(X_train, y_train)

In [None]:
y_pred = randomf.predict(X_test)

In [None]:
accuracy_score(y_test, y_pred)