In [8]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split

# Load datasets (modify paths if necessary)
fraud_data = pd.read_csv('../data/fraud_data_cleaned.csv')
credit_card_data = pd.read_csv('../data/creditcard.csv')

# Step 1.1: Separate Features (X) and Target (y) for Credit Card Dataset
X_credit = credit_card_data.drop('Class', axis=1)
y_credit = credit_card_data['Class']

# Step 1.2: Separate Features (X) and Target (y) for Fraud Data
X_fraud = fraud_data.drop('class', axis=1)
y_fraud = fraud_data['class']

# Print shapes of both feature matrices and target vectors
print("Credit Card Data - Features Shape:", X_credit.shape, "Target Shape:", y_credit.shape)
print("Fraud Data - Features Shape:", X_fraud.shape, "Target Shape:", y_fraud.shape)

# Step 1.3: Train-Test Split for both datasets

# Credit Card Dataset (80% train, 20% test)
X_credit_train, X_credit_test, y_credit_train, y_credit_test = train_test_split(
    X_credit, y_credit, test_size=0.2, random_state=42)

# Fraud Data (80% train, 20% test)
X_fraud_train, X_fraud_test, y_fraud_train, y_fraud_test = train_test_split(
    X_fraud, y_fraud, test_size=0.2, random_state=42)

# Print shapes of train and test sets for both datasets
print("\nCredit Card Data:")
print("Train Set - Features:", X_credit_train.shape, "Target:", y_credit_train.shape)
print("Test Set - Features:", X_credit_test.shape, "Target:", y_credit_test.shape)

print("\nFraud Data:")
print("Train Set - Features:", X_fraud_train.shape, "Target:", y_fraud_train.shape)
print("Test Set - Features:", X_fraud_test.shape, "Target:", y_fraud_test.shape)


Credit Card Data - Features Shape: (284807, 30) Target Shape: (284807,)
Fraud Data - Features Shape: (151112, 10) Target Shape: (151112,)

Credit Card Data:
Train Set - Features: (227845, 30) Target: (227845,)
Test Set - Features: (56962, 30) Target: (56962,)

Fraud Data:
Train Set - Features: (120889, 10) Target: (120889,)
Test Set - Features: (30223, 10) Target: (30223,)


In [9]:
# Import necessary libraries
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
import warnings

warnings.filterwarnings('ignore')  # Suppress warnings for cleaner output

# Function to train and evaluate models
def train_and_evaluate(model, X_train, X_test, y_train, y_test, dataset_name, model_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    print(f"\n{dataset_name} - {model_name} Performance:")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))

# Step 2.1: Initialize models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, random_state=42),
    "MLP Classifier": MLPClassifier(hidden_layer_sizes=(50, 50), max_iter=300, random_state=42)
}

# Step 2.2: Train and evaluate models on Credit Card Data
print("\n--- Credit Card Data ---")
for model_name, model in models.items():
    train_and_evaluate(model, X_credit_train, X_credit_test, y_credit_train, y_credit_test, 
                       "Credit Card Data", model_name)

# Step 2.3: Train and evaluate models on Fraud Data
print("\n--- Fraud Data ---")
for model_name, model in models.items():
    train_and_evaluate(model, X_fraud_train, X_fraud_test, y_fraud_train, y_fraud_test, 
                       "Fraud Data", model_name)



--- Credit Card Data ---

Credit Card Data - Logistic Regression Performance:
Accuracy: 0.9988939995084443
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.79      0.49      0.60        98

    accuracy                           1.00     56962
   macro avg       0.89      0.74      0.80     56962
weighted avg       1.00      1.00      1.00     56962


Credit Card Data - Decision Tree Performance:
Accuracy: 0.9990519995786665
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.70      0.80      0.74        98

    accuracy                           1.00     56962
   macro avg       0.85      0.90      0.87     56962
weighted avg       1.00      1.00      1.00     56962


Credit Card Data - Random Forest Performance:
Accuracy: 0.9995611109160493
Classification Report:
               

ValueError: could not convert string to float: '2015-01-05 06:37:15'