In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from imblearn.over_sampling import RandomOverSampler

In [3]:
df = pd.read_csv("D:/CreditCard_data.csv")
x = df.drop("Class", axis = 1)
y = df["Class"]

In [4]:
ros = RandomOverSampler(random_state = 42)
x_balanced, y_balanced = ros.fit_resample(x, y)
balanced_df = pd.DataFrame(x_balanced, columns = x.columns)
balanced_df["Class"] = y_balanced

In [5]:
def simple_random_sampling(df, frac = 0.6):
    return df.sample(frac = frac, random_state = 42)

def systematic_sampling(df, step = 2):
    return df.iloc[::step]

def stratified_sampling(df, frac = 0.6):
    return df.groupby("Class", group_keys = False).apply(lambda x: x.sample(frac = frac, random_state = 42))

def cluster_sampling(df):
    df = df.copy()
    df["Cluster"] = df.index % 5
    choosen_cluster = np.random.choice(df["Cluster"].unique())
    return df[df["Cluster"] == choosen_cluster].drop("Cluster", axis = 1)

def bootstrap_sampling(df, n_samples):
    return df.sample(n = n_samples, replace = True, random_state = 42)

In [6]:
samples = {
    "Simple Random": simple_random_sampling(balanced_df),
    "Systematic": systematic_sampling(balanced_df),
    "Stratified": stratified_sampling(balanced_df),
    "Cluster": cluster_sampling(balanced_df),
    "Bootstrap": bootstrap_sampling(balanced_df, len(balanced_df))
}

  return df.groupby("Class", group_keys = False).apply(lambda x: x.sample(frac = frac, random_state = 42))


In [7]:
models = {
    "Logistic Regression": LogisticRegression(max_iter = 1000),
    "Decision Tree": DecisionTreeClassifier(random_state = 42),
    "Random Forest": RandomForestClassifier(n_estimators = 100, random_state = 42),
    "Naive Bayes": GaussianNB(),
    "Support Vector Machine": SVC()
}

In [8]:
results = []
for sample_name, sample_df in samples.items():
    x = sample_df.drop("Class", axis = 1)
    y = sample_df["Class"]
    scaler = StandardScaler()
    x_sclaed = scaler.fit_transform(x)
    x_train, x_test, y_train, y_test = train_test_split(x_sclaed, y, test_size = 0.2, random_state = 42, stratify = y)
    for model_name, model in models.items():
        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)
        accuracy = accuracy_score(y_test, y_pred) * 100
        results.append({
            "Sampling Technique": sample_name,
            "Model": model_name,
            "Accuracy": accuracy
        })

In [9]:
results_df = pd.DataFrame(results)
accuracy_table = (
    results_df
    .pivot(index = "Model", columns = "Sampling Technique", values = "Accuracy")
    .round(2)
)
print("\n Accuracy Table (Sampling x Model):")
print(accuracy_table)


 Accuracy Table (Sampling x Model):
Sampling Technique      Bootstrap  Cluster  Simple Random  Stratified  \
Model                                                                   
Decision Tree               99.67    96.72          99.46       98.37   
Logistic Regression         94.77    85.25          94.02       92.93   
Naive Bayes                 82.35    72.13          69.02       79.89   
Random Forest              100.00   100.00         100.00      100.00   
Support Vector Machine      98.04    98.36          97.28       98.91   

Sampling Technique      Systematic  
Model                               
Decision Tree                99.35  
Logistic Regression          90.85  
Naive Bayes                  86.93  
Random Forest               100.00  
Support Vector Machine       96.73  


In [10]:
best_sampling = accuracy_table.idxmax(axis = 1)
print("\n Best Sampling Technique per Model:")
print(best_sampling)


 Best Sampling Technique per Model:
Model
Decision Tree              Bootstrap
Logistic Regression        Bootstrap
Naive Bayes               Systematic
Random Forest              Bootstrap
Support Vector Machine    Stratified
dtype: object
