# Credit Card Fraud Detection using Isolation Forest Algorithm & Local Outlier Factor(LOF) Algorithm


In [None]:
#import libraries
import numpy as np
import pandas as pd
import sklearn
from sklearn.metrics import classification_report,accuracy_score
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor


In [None]:
data=pd.read_csv('../input/creditcardfraud/creditcard.csv')
data.head()

In [None]:
data.shape

In [None]:
data.info() # generalized info


In [None]:
data.describe() #statistical info

In [None]:
# Check for Null values
data.isnull().sum()

In [None]:
X=data.drop("Class",axis=1)
Y=data["Class"]
# Print the shapes of X & Y
print(X.shape)
print(Y.shape)

In [None]:
#Determine the number of fraud and valid transactions in the dataset

Fraud = data[data['Class']==1]

Valid = data[data['Class']==0]

outlier_fraction = len(Fraud)/float(len(Valid))

In [None]:
print(outlier_fraction)

print("Fraud Cases : {}".format(len(Fraud)))

print("Valid Cases : {}".format(len(Valid)))

In [82]:
##Define the outlier detection methods
state = np.random.RandomState(40)
classifiers = {
    "Isolation Forest":IsolationForest(n_estimators=100, max_samples=len(X), 
                                       contamination=outlier_fraction,random_state=state, verbose=0),
    "Local Outlier Factor":LocalOutlierFactor(n_neighbors=50, algorithm='auto', 
                                              leaf_size=30, metric='minkowski',
                                              p=2, metric_params=None, contamination=outlier_fraction)       
}

In [None]:
n_outliers = len(Fraud)

for i, (clf_name,clf) in enumerate(classifiers.items()):
    #Fit the data and tag outliers
    if clf_name == "Local Outlier Factor":
        y_pred = clf.fit_predict(X)
        scores_prediction = clf.negative_outlier_factor_
    elif clf_name=="Isolation Forest":   
        clf.fit(X)
        scores_prediction = clf.decision_function(X)
        y_pred = clf.predict(X)
        #Reshape the prediction values to 0 for Valid transactions , 1 for Fraud transactions
    y_pred[y_pred == 1] = 0
    y_pred[y_pred == -1] = 1
    
    # Run Classification Metrics
    print("{}".format(clf_name))
    print("Accuracy Score :")
    print(accuracy_score(Y,y_pred))
    print("Classification Report :")
    print(classification_report(Y,y_pred))
 
   

Isolation Forest
Accuracy Score :
0.9976159293837581
Classification Report :
