<a href="https://colab.research.google.com/github/VinayNegi7/CODSOFT/blob/main/Credit_card_Fraud_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

TASK 5: Credit Card Fraud Detection


In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

pd.set_option('display.max_columns', None)

In [5]:
# calling the dataset
df=pd.read_csv('creditcard.csv')
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,-0.5516,-0.617801,-0.99139,-0.311169,1.468177,-0.470401,0.207971,0.025791,0.403993,0.251412,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0.0
1,0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,1.612727,1.065235,0.489095,-0.143772,0.635558,0.463917,-0.114805,-0.183361,-0.145783,-0.069083,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0.0
2,1,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,0.624501,0.066084,0.717293,-0.165946,2.345865,-2.890083,1.109969,-0.121359,-2.261857,0.52498,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0.0
3,1,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,-0.226487,0.178228,0.507757,-0.287924,-0.631418,-1.059647,-0.684093,1.965775,-1.232622,-0.208038,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0.0
4,2,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,-0.822843,0.538196,1.345852,-1.11967,0.175121,-0.451449,-0.237033,-0.038195,0.803487,0.408542,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0.0


Exploration of Dataset

In [6]:
# dimensions
df.shape

(23858, 31)

In [7]:
# datatype
df.dtypes

Time        int64
V1        float64
V2        float64
V3        float64
V4        float64
V5        float64
V6        float64
V7        float64
V8        float64
V9        float64
V10       float64
V11       float64
V12       float64
V13       float64
V14       float64
V15       float64
V16       float64
V17       float64
V18       float64
V19       float64
V20       float64
V21       float64
V22       float64
V23       float64
V24       float64
V25       float64
V26       float64
V27       float64
V28       float64
Amount    float64
Class     float64
dtype: object

In [8]:
# checking for the null values
df.isnull().sum()

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       1
V23       1
V24       1
V25       1
V26       1
V27       1
V28       1
Amount    1
Class     1
dtype: int64

In [9]:
# checking the balance of dataset
fraud_count= df['Class'].value_counts()
fraud_rate=100*fraud_count/df.shape[0]
fraud_data=pd.concat([fraud_count,fraud_rate],axis=1).reset_index()
fraud_data.columns=['Class','Count','Percentage']
fraud_data

Unnamed: 0,Class,Count,Percentage
0,0.0,23769,99.62696
1,1.0,88,0.368849


Data preparation

In [12]:
# handling imbalance
df_fraud=df[df['Class']==1]
df_not_fraud=df[df['Class']==0]
df_not_fraud_sample= df_not_fraud.sample(df_fraud.shape[0],replace=False, random_state=101)

df_balanced=pd.concat([df_not_fraud_sample, df_fraud],axis=0).sample(frac=1,replace=False,random_state=101).reset_index().drop('index',axis=1)
df_balanced

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,25231,-16.598665,10.541751,-19.818982,6.017295,-13.025901,-4.128779,-14.118865,11.161144,-4.099551,-9.222826,6.329365,-8.952191,-0.138364,-9.825054,0.057224,-7.541687,-14.259599,-5.035052,1.432268,1.534920,1.725853,-1.151606,-0.680052,0.108176,1.066878,-0.233720,1.707521,0.511423,99.99,1.0
1,21046,-16.917468,9.669900,-23.736443,11.824990,-9.830548,-2.514829,-17.290657,1.820408,-6.264903,-12.916636,9.567110,-13.717067,0.899541,-13.272965,-0.402260,-7.754094,-11.644603,-4.741303,0.584626,0.996745,-2.336111,0.972755,1.241866,-1.051086,0.038009,0.672317,2.108471,-1.421243,1.00,1.0
2,31706,0.204244,-1.253126,-0.121944,1.468147,-0.507941,0.105586,0.648465,-0.082910,-0.443316,-0.058224,1.629801,1.346255,0.743408,0.519884,0.470190,0.084146,-0.472089,-0.011566,-0.704211,0.878778,0.453987,0.281605,-0.532141,0.066460,0.415750,-0.322606,-0.056650,0.095027,493.00,0.0
3,32583,1.222600,0.404934,0.233423,2.165828,0.639472,1.134849,-0.086273,0.189317,-0.585709,0.665985,-0.564783,0.821944,1.106138,-0.201144,-1.330089,0.910588,-1.167243,0.261922,0.349206,-0.027878,-0.322501,-0.810554,-0.171045,-1.387711,0.632084,-0.075702,0.006596,0.001342,11.41,0.0
4,17187,1.088375,0.898474,0.394684,3.170258,0.175739,-0.221981,-0.022989,-0.010874,0.860044,-0.592473,1.072728,-2.547557,1.235950,-0.330306,-1.022354,0.335642,2.298998,-0.162096,-1.532707,-0.217358,-0.423554,-0.800852,0.077614,0.167608,0.350182,-0.118941,0.012948,0.054254,3.79,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171,29157,-0.539906,-3.175015,0.187317,0.881224,-1.621140,1.199706,0.168192,0.299654,0.851223,-0.682476,1.258360,1.162199,-0.545390,-0.004424,-0.376179,-0.340582,0.492742,-0.809733,-0.301901,1.531583,0.375055,-0.555086,-0.601565,-0.188835,-0.363258,0.865315,-0.163519,0.142518,844.06,0.0
172,30916,1.311914,-0.779253,0.940827,-0.666421,-1.383582,-0.266491,-1.129296,0.105309,-0.314225,0.576576,-0.201392,-0.860452,-0.285974,-0.302598,1.431905,1.216656,0.366015,-1.227511,-0.132713,0.042340,0.408097,1.117399,-0.075068,0.109021,0.331788,-0.018794,0.048287,0.022980,22.00,0.0
173,31448,0.872250,-0.113100,1.506023,2.903818,-0.848068,0.534471,-0.471521,0.274335,0.204935,0.345489,-0.607340,0.421022,-0.233062,-0.428004,-0.650035,-0.031538,0.211135,-0.971771,-1.119502,-0.036417,-0.097387,-0.243796,0.053427,0.388809,0.166636,-0.090903,0.041775,0.049614,94.04,0.0
174,14303,1.115082,0.327387,0.874767,1.276130,-0.233102,-0.203531,-0.138394,-0.116372,1.304862,-0.496910,1.017587,-1.575076,2.791450,1.483179,0.569389,0.005714,0.400774,-0.494460,-0.946122,-0.077263,-0.284582,-0.557798,0.115390,0.006964,0.228658,-0.639115,0.028253,0.032358,34.47,0.0


In [14]:
# checking the balance of dataset
fraud_count= df_balanced["Class"].value_counts()
fraud_rate=100*fraud_count/df_balanced.shape[0]
fraud_data=pd.concat([fraud_count,fraud_rate],axis=1).reset_index()
fraud_data.columns=['Class','Count','Percentage']
fraud_data

Unnamed: 0,Class,Count,Percentage
0,1.0,88,50.0
1,0.0,88,50.0


In [15]:
# train test split
X_train,X_test,y_train,y_test=train_test_split(df_balanced.drop('Class',axis=1),df_balanced['Class'],test_size=0.2,random_state=101)

print(f'''X_train:{X_train.shape}
X_test:{X_test.shape}
y_train:{y_train.shape}
y_test:{y_test.shape}''')


X_train:(140, 30)
X_test:(36, 30)
y_train:(140,)
y_test:(36,)


Fitting a Random Forest Model

In [16]:
# logistic model pipe
randomForestModel= Pipeline([('scaler',StandardScaler()),('model',RandomForestClassifier())])
randomForestModel.fit(X_train,y_train)

In [17]:
# prediction
y_pred_logis=randomForestModel.predict(X_test)
y_pred_logis

array([1., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 1., 1., 0., 0., 1., 1.,
       0., 1., 1., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1., 0., 0., 0., 0.,
       1., 1.])

In [19]:
# Classification report
cr= classification_report(y_test,y_pred_logis)
print(cr)

              precision    recall  f1-score   support

         0.0       0.94      1.00      0.97        16
         1.0       1.00      0.95      0.97        20

    accuracy                           0.97        36
   macro avg       0.97      0.97      0.97        36
weighted avg       0.97      0.97      0.97        36



Result:
The accuracy of the model is coming out to be 97% which is high so the model is considered as accurate.