In [None]:
# %% [markdown]
# # Credit Card Fraud - EDA & Preprocessing

# %%
# Install seaborn if not already installed
%pip install seaborn

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import os

# Load data
file_path = "../data/creditcard.csv"
if not os.path.exists(file_path):
    raise FileNotFoundError(f"Dataset not found at {file_path}. Please check the file path.")
df = pd.read_csv(file_path)
print(f"Data shape: {df.shape}")

# Check class distribution
plt.figure(figsize=(6, 4))
sns.countplot(x='Class', data=df)
plt.title('Fraud vs Normal Transactions')
plt.show()

# Check missing values
print("\nMissing values:")
print(df.isnull().sum())

# %% [markdown]
# ## Preprocessing

# %%
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Scale 'Time' and 'Amount'
scaler = StandardScaler()
df['Amount'] = scaler.fit_transform(df[['Amount']])
df['Time'] = scaler.fit_transform(df[['Time']])

# Split features/target
X = df.drop('Class', axis=1)
y = df['Class']

# Split train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"\nTrain shape: {X_train.shape}, Test shape: {X_test.shape}")