# Modeling Notebook

This notebook is dedicated to building and training machine learning models for fraud detection in e-commerce and bank transactions. We will focus on:
- Establishing a baseline model
- Implementing ensemble models
- Evaluating model performance with appropriate metrics
- Addressing class imbalance in the dataset

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from src.data.sampling import handle_class_imbalance
from src.models.trainer import train_model
from src.models.pipelines import create_pipeline

# Load the processed dataset
data = pd.read_csv('../data/processed/fraud_data.csv')

# Split the data into features and target
X = data.drop('target', axis=1)
y = data['target']

# Handle class imbalance
X_resampled, y_resampled = handle_class_imbalance(X, y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Create a baseline model
baseline_model = RandomForestClassifier(random_state=42)
baseline_model.fit(X_train, y_train)

# Evaluate the baseline model
y_pred_baseline = baseline_model.predict(X_test)
print("Baseline Model Classification Report:")
print(classification_report(y_test, y_pred_baseline))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_baseline))

# Implementing an ensemble model
ensemble_model = create_pipeline(RandomForestClassifier(n_estimators=100, random_state=42))
ensemble_model.fit(X_train, y_train)

# Evaluate the ensemble model
y_pred_ensemble = ensemble_model.predict(X_test)
print("Ensemble Model Classification Report:")
print(classification_report(y_test, y_pred_ensemble))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_ensemble))

# Save the trained models for future use
import joblib
joblib.dump(baseline_model, '../models/baseline_model.pkl')
joblib.dump(ensemble_model, '../models/ensemble_model.pkl')
