In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from sklearn.utils import resample
import numpy as np

In [8]:
data = pd.read_csv('Creditcard_data.csv')

X = data.drop(columns=["Class"])
y = data["Class"]

In [9]:
smote = SMOTE(random_state=42)
X_balanced, y_balanced = smote.fit_resample(X, y)

def random_sampling(X, y, size):
    return resample(X, y, n_samples=size, random_state=42)

def stratified_sampling(X, y, size):
    from sklearn.model_selection import StratifiedShuffleSplit
    sss = StratifiedShuffleSplit(n_splits=1, test_size=size / len(y), random_state=42)
    for train_idx, _ in sss.split(X, y):
        return X.iloc[train_idx], y.iloc[train_idx]

sample_sizes = [int(len(X_balanced) * 0.1 * i) for i in range(1, 6)]

samples = {
    "Sampling1": random_sampling(X_balanced, y_balanced, sample_sizes[0]),
    "Sampling2": random_sampling(X_balanced, y_balanced, sample_sizes[1]),
    "Sampling3": stratified_sampling(X_balanced, y_balanced, sample_sizes[2]),
    "Sampling4": random_sampling(X_balanced, y_balanced, sample_sizes[3]),
    "Sampling5": stratified_sampling(X_balanced, y_balanced, sample_sizes[4]),
}

models = {
    "M1": LogisticRegression(),
    "M2": RandomForestClassifier(),
    "M3": DecisionTreeClassifier(),
    "M4": GaussianNB(),
    "M5": SVC()
}

In [10]:
results = pd.DataFrame(columns=["Sampling", "Model", "Accuracy"])

for sample_name, (X_sample, y_sample) in samples.items():
    X_train, X_test, y_train, y_test = train_test_split(X_sample, y_sample, test_size=0.2, random_state=42)
    for model_name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        results = pd.concat([results, pd.DataFrame([{"Sampling": sample_name, "Model": model_name, "Accuracy": accuracy}])], ignore_index=True)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  results = pd.concat([results, pd.DataFrame([{"Sampling": sample_name, "Model": model_name, "Accuracy": accuracy}])], ignore_index=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as 

In [11]:
print(results)

results.to_csv("results.csv", index=False)

     Sampling Model  Accuracy
0   Sampling1    M1  0.903226
1   Sampling1    M2  0.967742
2   Sampling1    M3  0.903226
3   Sampling1    M4  0.903226
4   Sampling1    M5  0.709677
5   Sampling2    M1  0.950820
6   Sampling2    M2  1.000000
7   Sampling2    M3  0.967213
8   Sampling2    M4  0.803279
9   Sampling2    M5  0.639344
10  Sampling3    M1  0.920561
11  Sampling3    M2  1.000000
12  Sampling3    M3  0.967290
13  Sampling3    M4  0.789720
14  Sampling3    M5  0.700935
15  Sampling4    M1  0.909836
16  Sampling4    M2  1.000000
17  Sampling4    M3  0.967213
18  Sampling4    M4  0.770492
19  Sampling4    M5  0.598361
20  Sampling5    M1  0.921569
21  Sampling5    M2  1.000000
22  Sampling5    M3  0.954248
23  Sampling5    M4  0.797386
24  Sampling5    M5  0.666667


In [12]:
df_results = pd.read_csv("results.csv")
df_results.head()

Unnamed: 0,Sampling,Model,Accuracy
0,Sampling1,M1,0.903226
1,Sampling1,M2,0.967742
2,Sampling1,M3,0.903226
3,Sampling1,M4,0.903226
4,Sampling1,M5,0.709677
