In [6]:
import os
import joblib
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report

# Get the absolute path to the project directory (two levels up from notebooks/)
BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), ".."))
DATA_PATH = os.path.join(BASE_DIR, "data", "cleaned_creditcard.csv")
MODEL_DIR = os.path.join(BASE_DIR, "models")
PREDICTIONS_PATH = os.path.join(BASE_DIR, "data", "predictions.csv")

# Debugging: Print paths
print(f"Base Directory: {BASE_DIR}")
print(f"Data Path: {DATA_PATH}")
print(f"Model Directory: {MODEL_DIR}")

# Ensure paths exist
if not os.path.exists(MODEL_DIR):
    raise FileNotFoundError(f"Model directory not found: {MODEL_DIR}")
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError(f"Dataset not found: {DATA_PATH}")

# Load models
logistic_model = joblib.load(os.path.join(MODEL_DIR, "logistic_regression.pkl"))
rf_model = joblib.load(os.path.join(MODEL_DIR, "random_forest.pkl"))

print("Models loaded successfully!")

# Load dataset
df = pd.read_csv(DATA_PATH)

# Separate features and target variable
X = df.drop(columns=["Class"])
y = df["Class"]

# Make predictions using both models
y_pred_logistic = logistic_model.predict(X)
y_pred_rf = rf_model.predict(X)

# Evaluate models
print("\nLogistic Regression Model Performance:")
print(f"Accuracy: {accuracy_score(y, y_pred_logistic):.4f}")
print(classification_report(y, y_pred_logistic))

print("\nRandom Forest Model Performance:")
print(f"Accuracy: {accuracy_score(y, y_pred_rf):.4f}")
print(classification_report(y, y_pred_rf))

# Store predictions in DataFrame
df["Logistic_Prediction"] = y_pred_logistic
df["RF_Prediction"] = y_pred_rf

# Save predictions to CSV
df.to_csv(PREDICTIONS_PATH, index=False)
print(f"Predictions saved successfully at {PREDICTIONS_PATH}")


Base Directory: /Users/hadywehbi/credit_card_fraud_detection
Data Path: /Users/hadywehbi/credit_card_fraud_detection/data/cleaned_creditcard.csv
Model Directory: /Users/hadywehbi/credit_card_fraud_detection/models
Models loaded successfully!





Logistic Regression Model Performance:
Accuracy: 0.9462
              precision    recall  f1-score   support

           0       0.92      0.98      0.95    284315
           1       0.97      0.92      0.94    284315

    accuracy                           0.95    568630
   macro avg       0.95      0.95      0.95    568630
weighted avg       0.95      0.95      0.95    568630


Random Forest Model Performance:
Accuracy: 0.9888
              precision    recall  f1-score   support

           0       0.98      1.00      0.99    284315
           1       1.00      0.98      0.99    284315

    accuracy                           0.99    568630
   macro avg       0.99      0.99      0.99    568630
weighted avg       0.99      0.99      0.99    568630

Predictions saved successfully at /Users/hadywehbi/credit_card_fraud_detection/data/predictions.csv
