In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Ensure output directory for plots
os.makedirs('../plots', exist_ok=True)

# Load data
fraud_data = pd.read_csv('../data/Fraud_Data.csv')
creditcard_data = pd.read_csv('../data/creditcard.csv')

# Univariate Analysis
# Fraud Data: Distribution of purchase_value
plt.figure(figsize=(10, 6))
sns.histplot(fraud_data['purchase_value'], bins=30)
plt.title('Distribution of Purchase Value (Fraud Data)')
plt.xlabel('Purchase Value ($)')
plt.ylabel('Count')
plt.savefig('../plots/purchase_value_dist.png')
plt.close()

# Creditcard Data: Distribution of Amount
plt.figure(figsize=(10, 6))
sns.histplot(creditcard_data['Amount'], bins=30)
plt.title('Distribution of Transaction Amount (Creditcard Data)')
plt.xlabel('Amount ($)')
plt.ylabel('Count')
plt.savefig('../plots/amount_dist.png')
plt.close()

# Class distribution
plt.figure(figsize=(10, 6))
fraud_data['class'].value_counts().plot(kind='bar')
plt.title('Class Distribution (Fraud Data)')
plt.xlabel('Class (0: Non-Fraud, 1: Fraud)')
plt.ylabel('Count')
plt.savefig('../plots/fraud_class_dist.png')
plt.close()

plt.figure(figsize=(10, 6))
creditcard_data['Class'].value_counts().plot(kind='bar')
plt.title('Class Distribution (Creditcard Data)')
plt.xlabel('Class (0: Non-Fraud, 1: Fraud)')
plt.ylabel('Count')
plt.savefig('../plots/creditcard_class_dist.png')
plt.close()

# Bivariate Analysis: Purchase Value vs Class (Fraud Data)
plt.figure(figsize=(10, 6))
sns.boxplot(x='class', y='purchase_value', data=fraud_data)
plt.title('Purchase Value by Class (Fraud Data)')
plt.xlabel('Class')
plt.ylabel('Purchase Value ($)')
plt.savefig('../plots/purchase_value_vs_class.png')
plt.close()

# Correlation matrix (Creditcard Data)
plt.figure(figsize=(12, 8))
sns.heatmap(creditcard_data.corr(), cmap='coolwarm', annot=False)
plt.title('Correlation Matrix (Creditcard Data)')
plt.savefig('../plots/creditcard_corr_matrix.png')
plt.close()