In [2]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# --- FIX: Set the working directory to the project root ---

# Get the current directory of the notebook (which is '.../fraud-detection-10academy/notebooks/')
current_notebook_location = os.getcwd()

# Go up one level to reach the project root ('.../fraud-detection-10academy/')
project_root = os.path.abspath(os.path.join(current_notebook_location, os.pardir))

# Change the current working directory for the kernel
os.chdir(project_root)

print(f"Current working directory changed to: {os.getcwd()}")

# --- FIX: Ensure output directories exist ---
# These paths are now relative to the new working directory (project_root)
reports_dir = 'reports'
figures_dir = os.path.join(reports_dir, 'figures') # This correctly creates 'reports/figures'

os.makedirs(reports_dir, exist_ok=True)    # Create 'reports' if it doesn't exist
os.makedirs(figures_dir, exist_ok=True)    # Create 'reports/figures' if it doesn't exist
print(f"Ensured '{reports_dir}' and '{figures_dir}' directories exist.")

# --- END OF FIXES ---


# Load cleaned data (these paths are now relative to the correctly set project_root)
# Use os.path.join for robust path construction
fraud_data = pd.read_csv(os.path.join('data', 'processed', 'Fraud_Data_cleaned.csv'))
creditcard_data = pd.read_csv(os.path.join('data', 'processed', 'creditcard_cleaned.csv'))

print("\nData loaded successfully.")
print(f"Fraud Data shape: {fraud_data.shape}")
print(f"Credit Card Data shape: {creditcard_data.shape}")


# Univariate Analysis
print("\n--- Performing Univariate Analysis ---")

plt.figure(figsize=(10, 6))
sns.histplot(fraud_data['purchase_value'], bins=50)
plt.title('Distribution of Purchase Value (Fraud Data)')
# Use os.path.join for saving figures too
plt.savefig(os.path.join(figures_dir, 'purchase_value_dist.png'))
plt.close()
print("Saved: purchase_value_dist.png")


plt.figure(figsize=(10, 6))
sns.countplot(x='class', data=fraud_data)
plt.title('Class Distribution (Fraud Data)')
plt.savefig(os.path.join(figures_dir, 'class_distribution_fraud.png'))
plt.close()
print("Saved: class_distribution_fraud.png")


plt.figure(figsize=(10, 6))
sns.countplot(x='Class', data=creditcard_data)
plt.title('Class Distribution (Credit Card Data)')
plt.savefig(os.path.join(figures_dir, 'class_distribution_creditcard.png'))
plt.close()
print("Saved: class_distribution_creditcard.png")


# Bivariate Analysis
print("\n--- Performing Bivariate Analysis ---")

plt.figure(figsize=(10, 6))
sns.boxplot(x='class', y='purchase_value', data=fraud_data)
plt.title('Purchase Value by Class (Fraud Data)')
plt.savefig(os.path.join(figures_dir, 'purchase_value_by_class.png'))
plt.close()
print("Saved: purchase_value_by_class.png")

print("\nAnalysis complete. All figures saved.")

Current working directory changed to: c:\FraudDetection\fraud-detection-10academy
Ensured 'reports' and 'reports\figures' directories exist.

Data loaded successfully.
Fraud Data shape: (151112, 11)
Credit Card Data shape: (283726, 31)

--- Performing Univariate Analysis ---
Saved: purchase_value_dist.png
Saved: class_distribution_fraud.png
Saved: class_distribution_creditcard.png

--- Performing Bivariate Analysis ---
Saved: purchase_value_by_class.png

Analysis complete. All figures saved.


In [1]:
import os
print(f"Current Working Directory: {os.getcwd()}")

Current Working Directory: c:\FraudDetection\fraud-detection-10academy\notebooks
