In [43]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, matthews_corrcoef
from imblearn.over_sampling import SMOTE
from sklearn.feature_selection import VarianceThreshold


In [44]:
df2 = pd.read_csv("Datasets.csv")
print(df2.shape)
print(df2.columns)

(10127, 17)
Index(['Unnamed: 0', 'step', 'type', 'branch', 'amount', 'nameOrig',
       'oldbalanceOrg', 'newbalanceOrig', 'nameDest', 'oldbalanceDest',
       'newbalanceDest', 'unusuallogin', 'isFlaggedFraud', 'Acct type',
       'Date of transaction', 'Time of day', 'isFraud'],
      dtype='object')


In [45]:
df2.rename(columns={df2.columns[-1]: "isFraud"}, inplace=True)

df2 = df2.replace([np.inf, -np.inf], np.nan)
df2 = df2.dropna()

In [46]:
X2 = df2.drop("isFraud", axis=1)
y2 = df2["isFraud"]

non_numeric_cols = X2.select_dtypes(include=['object']).columns
X2 = X2.drop(columns=non_numeric_cols)

X2 = X2.astype(float)
y2 = y2[X2.index]  

In [47]:
smote = SMOTE(random_state=42)
X2_resampled, y2_resampled = smote.fit_resample(X2, y2)
print("After SMOTE:", np.bincount(y2_resampled))

scaler = StandardScaler()
X2_scaled = scaler.fit_transform(X2_resampled)

After SMOTE: [10020 10020]




In [48]:

X_train, X_test, y_train, y_test = train_test_split(X2_scaled, y2_resampled, test_size=0.2, random_state=42)

In [None]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=2000, solver='saga'),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(n_estimators=100)
}

results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    mcc = matthews_corrcoef(y_test, y_pred)

    results.append({
        'Model': name,
        'Accuracy': accuracy,
        'Precision': precision,
        'MCC': mcc
    })

results_df = pd.DataFrame(results)
print(results_df)


                 Model  Accuracy  Precision       MCC
0  Logistic Regression  0.817116   0.790871  0.636676
1        Decision Tree  0.994261   0.991964  0.988532
2        Random Forest  0.996507   0.993976  0.993026
