# 📊 End-to-End Exploratory Data Analysis (EDA)

This notebook provides a **modular workflow** for performing both **basic and advanced EDA**.

👉 Steps:
1. Load dataset
2. Perform Basic EDA
3. Perform Advanced EDA (visualizations, correlations, PCA)

Fill in your dataset path in the Parameters section.

In [ ]:
# 📌 Parameters (edit these)
CSV_PATH = "path/to/your.csv"  # dataset path
TARGET_COL = None                # target column for classification/regression (optional)
DATETIME_COL = None              # datetime column (optional)

In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

df = pd.read_csv(CSV_PATH)
df.head()

## 🔹 1. Basic EDA

In [ ]:
print(df.shape)
print(df.info())
df.describe(include='all')

In [ ]:
df.isnull().sum().sort_values(ascending=False).head(10)

In [ ]:
plt.figure(figsize=(6,4))
sns.heatmap(df.isnull(), cbar=False)
plt.title("Missing Data Map")
plt.show()

## 🔹 2. Advanced EDA & Visualizations

In [ ]:
num_cols = df.select_dtypes(include=np.number).columns.tolist()
cat_cols = df.select_dtypes(exclude=np.number).columns.tolist()
print("Numeric:", num_cols)
print("Categorical:", cat_cols)

In [ ]:
for col in num_cols[:3]:
    plt.figure(figsize=(6,4))
    sns.histplot(df[col].dropna(), kde=True)
    plt.title(f"Distribution of {col}")
    plt.show()

In [ ]:
plt.figure(figsize=(8,6))
sns.heatmap(df[num_cols].corr(), cmap='coolwarm', annot=False)
plt.title("Correlation Heatmap")
plt.show()

In [ ]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df[num_cols].dropna())
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
plt.scatter(X_pca[:,0], X_pca[:,1], alpha=0.5)
plt.title("PCA (first 2 components)")
plt.show()