In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import tabulate

from sklearn.model_selection import train_test_split
from imblearn.under_sampling import ClusterCentroids , TomekLinks , NearMiss

from sklearn.metrics import accuracy_score , confusion_matrix , precision_score , recall_score , f1_score , classification_report , auc , roc_curve , ConfusionMatrixDisplay ,  roc_auc_score
from sklearn.utils import resample
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression , LogisticRegression
from sklearn.preprocessing import LabelEncoder , RobustScaler

from imblearn.over_sampling import SMOTE

plt.style.use("fivethirtyeight")
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/creditcardfraud/creditcard.csv


# **Obtain the Data Set**

In [2]:
creditCard = pd.read_csv("/kaggle/input/creditcardfraud/creditcard.csv")

In [3]:
creditCard.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [4]:
creditCard.sample(25)

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
9437,13912.0,-1.061635,0.720603,2.467822,0.218738,-0.259301,0.666118,-0.142715,0.244025,1.52031,...,-0.104325,0.275768,-0.088831,-0.000904,-0.431948,0.164599,-0.606802,-0.406076,12.99,0
2169,1680.0,0.989081,-0.743314,-0.083869,-0.874971,-0.564913,-0.236326,-0.124165,0.141001,1.373852,...,-0.084714,-0.384947,-0.132523,-0.338615,0.482653,-0.760964,0.032963,0.023507,123.97,0
127625,78403.0,-0.482292,0.030268,1.718387,-2.079117,-0.761937,-1.398351,0.431235,-0.224983,-1.195773,...,-0.036351,0.277782,0.10607,0.959964,-0.679321,0.666515,0.072072,0.171714,40.49,0
55804,47134.0,-0.186,0.295894,1.984355,1.213443,-0.405774,0.829518,-0.344815,0.371061,1.09272,...,0.140276,0.849584,-0.24871,-0.405096,-0.424321,-0.138541,0.280352,0.197009,8.37,0
272973,165354.0,2.267243,-1.063266,-2.529909,-1.709238,-0.002178,-1.051274,0.054562,-0.542761,-2.539967,...,0.211445,1.038845,-0.170385,0.793588,0.639592,0.242816,-0.078061,-0.08064,63.0,0
162643,115265.0,2.132595,-0.103536,-2.444977,0.071638,0.870858,-0.782812,0.610393,-0.379828,0.345054,...,0.046419,0.204692,-0.096544,0.099625,0.432358,0.837613,-0.13856,-0.084298,29.95,0
135352,81212.0,-0.501388,1.074578,1.839441,-0.00402,0.020225,-1.021183,1.01009,-0.421264,-0.399598,...,-0.229256,-0.39718,0.018059,0.726927,-0.219674,0.00722,-0.016783,-0.10447,8.99,0
175989,122581.0,1.880427,0.268848,-0.238001,3.578786,0.317784,1.04629,-0.45473,0.395231,-0.490091,...,-0.415604,-1.260886,0.486256,0.135884,-0.552641,-0.530499,-0.015506,-0.040056,0.0,0
30630,36017.0,-1.004997,0.202581,0.896972,-2.153149,0.194958,-0.445412,1.428928,0.068138,0.134816,...,-0.161155,-0.854814,0.088065,-0.415028,0.530691,-1.016231,-0.009756,0.051594,145.0,0
91080,63305.0,-0.960288,0.169133,1.364268,-0.395683,0.914455,-0.814974,0.218068,-0.114386,-0.050601,...,-0.229084,-0.351974,-0.108071,0.228084,-0.491926,-0.141592,-0.080677,-0.236683,5.16,0


# **Exploring the Data Set**

In [5]:
creditCard.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Time,284807.0,94813.86,47488.145955,0.0,54201.5,84692.0,139320.5,172792.0
V1,284807.0,1.168375e-15,1.958696,-56.40751,-0.920373,0.018109,1.315642,2.45493
V2,284807.0,3.416908e-16,1.651309,-72.715728,-0.59855,0.065486,0.803724,22.057729
V3,284807.0,-1.379537e-15,1.516255,-48.325589,-0.890365,0.179846,1.027196,9.382558
V4,284807.0,2.074095e-15,1.415869,-5.683171,-0.84864,-0.019847,0.743341,16.875344
V5,284807.0,9.604066e-16,1.380247,-113.743307,-0.691597,-0.054336,0.611926,34.801666
V6,284807.0,1.487313e-15,1.332271,-26.160506,-0.768296,-0.274187,0.398565,73.301626
V7,284807.0,-5.556467e-16,1.237094,-43.557242,-0.554076,0.040103,0.570436,120.589494
V8,284807.0,1.213481e-16,1.194353,-73.216718,-0.20863,0.022358,0.327346,20.007208
V9,284807.0,-2.406331e-15,1.098632,-13.434066,-0.643098,-0.051429,0.597139,15.594995


In [6]:
creditCard.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

In [7]:
print(f"The Data Set contain {creditCard.shape[0]} Rows & {creditCard.shape[1]} Columns")
print(f"The Data Set contain {creditCard.duplicated().sum()} record duplicated") # Flag Here
print(f"Number of Nulls in each features :\n{creditCard.isna().sum()}")

The Data Set contain 284807 Rows & 31 Columns
The Data Set contain 1081 record duplicated
Number of Nulls in each features :
Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64
