In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE

In [8]:
data=pd.read_csv('/content/Creditcard_data.csv')
print(data.head())

   Time        V1        V2        V3        V4        V5        V6        V7  \
0     0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1     0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2     1 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3     1 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4     2 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 

In [9]:
x=data.drop('Class',axis=1)
y=data['Class']

In [10]:
scaler=StandardScaler()
x_scaled=scaler.fit_transform(x)

In [12]:
from imblearn.over_sampling import RandomOverSampler, SMOTE
ros=RandomOverSampler(random_state=42)
x_ros,y_ros=ros.fit_resample(x_scaled,y)

In [13]:
smote=SMOTE(random_state=42)
x_smote,y_smote=smote.fit_resample(x_scaled,y)

In [14]:
rus=RandomUnderSampler(random_state=42)
x_rus,y_rus=rus.fit_resample(x_scaled,y)

In [15]:
smotee=SMOTE(sampling_strategy=0.5,random_state=42)
x_smotee,y_smotee=smotee.fit_resample(x_scaled,y)

In [24]:
def split_data(x,y):
   return train_test_split(x,y,test_size=0.2,random_state=42,stratify=y)
splits={
    "Original":(x_scaled,y),
    "RandomOverSampler":(x_ros,y_ros),
    "SMOTE":(x_smote,y_smote),
    "Random UnderSampler":(x_rus,y_rus),
    "SMOTEENN":(x_smotee,y_smotee)
}


In [31]:
print(splits)

{'Original': (array([[-1.64803433, -0.91417964, -0.24726294, ...,  0.3664493 ,
        -0.01440974,  0.40944652],
       [-1.64803433,  1.05791448,  0.04177003, ..., -0.10752096,
         0.11421525, -0.3337123 ],
       [-1.64221099, -0.91305661, -1.32805577, ..., -0.26170665,
        -0.15353824,  1.56791039],
       ...,
       [ 1.72367735,  1.07515965, -0.00570364, ..., -0.1354703 ,
         0.11890945, -0.31707177],
       [ 1.72950069,  1.11601326, -0.24584082, ..., -0.06793994,
         0.04929592, -0.2848023 ],
       [ 1.73532402,  1.02846901, -0.07239758, ...,  0.03404218,
         0.11610679, -0.27756949]]), 0      0
1      1
2      0
3      0
4      0
      ..
767    0
768    0
769    0
770    0
771    0
Name: Class, Length: 772, dtype: int64), 'RandomOverSampler': (array([[-1.64803433, -0.91417964, -0.24726294, ...,  0.3664493 ,
        -0.01440974,  0.40944652],
       [-1.64803433,  1.05791448,  0.04177003, ..., -0.10752096,
         0.11421525, -0.3337123 ],
       [-1

In [29]:
models={
    "Random Forest":RandomForestClassifier(random_state=42),
    "Logistic Regression":LogisticRegression(random_state=42),

}
results=[]



In [32]:
for method, (x, y) in splits.items():
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
    for model_name, model in models.items():
        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)
        accuracy = accuracy_score(y_test, y_pred)
        results.append((method, model_name, accuracy))

In [33]:
res=pd.DataFrame(results,columns=['Sampling Method','Model','Accuracy'])
print(res)

       Sampling Method                Model  Accuracy
0             Original        Random Forest  0.987097
1             Original  Logistic Regression  0.987097
2    RandomOverSampler        Random Forest  1.000000
3    RandomOverSampler  Logistic Regression  0.918301
4                SMOTE        Random Forest  0.990196
5                SMOTE  Logistic Regression  0.911765
6  Random UnderSampler        Random Forest  0.750000
7  Random UnderSampler  Logistic Regression  0.500000
8             SMOTEENN        Random Forest  0.995633
9             SMOTEENN  Logistic Regression  0.908297
