# Credit Card Fraud Detection - EDA

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
data = pd.read_csv('../data/creditcard.csv')
data.head()


## Dataset Overview

In [None]:
print(data.info())
print(data.describe())


## Class Distribution

In [None]:
fraud = data[data['Class'] == 1]
valid = data[data['Class'] == 0]
print(f"Fraudulent transactions: {len(fraud)}")
print(f"Valid transactions: {len(valid)}")
print(f"Outlier fraction: {len(fraud) / float(len(valid)):.4f}")

sns.countplot(x="Class", data=data)
plt.title("Class Distribution")
plt.show()


## Transaction Amount Comparison

In [None]:
print("Fraud Amount Stats:")
print(fraud['Amount'].describe())

print("Valid Amount Stats:")
print(valid['Amount'].describe())

plt.figure(figsize=(10,5))
sns.histplot(valid['Amount'], bins=50, color='green', label='Valid', kde=True)
sns.histplot(fraud['Amount'], bins=50, color='red', label='Fraud', kde=True)
plt.legend()
plt.title("Transaction Amount Comparison")
plt.show()


## Correlation Matrix

In [None]:
corrmat = data.corr()
plt.figure(figsize=(12, 9))
sns.heatmap(corrmat, vmax=0.8, square=True, cmap="coolwarm")
plt.title("Correlation Matrix")
plt.show()
