In [None]:

# Exploratory Data Analysis (EDA) Template using Pandas, Matplotlib, and Seaborn

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
plt.rcParams["figure.dpi"] = 120

# Load your dataset (replace with your file path)
# df = pd.read_csv("your_file.csv")

# Quick overview
print("Shape:", df.shape)
display(df.head())
display(df.info())
display(df.describe(include='all').T)

# Missing values
missing = df.isnull().sum()
missing_pct = 100 * missing / len(df)
display(pd.concat([missing, missing_pct.rename("missing_pct")], axis=1).sort_values("missing_pct", ascending=False))

plt.figure(figsize=(8,4))
sns.heatmap(df.isnull(), cbar=False)
plt.title("Missing value map")
plt.show()

# Categorical and numerical columns
num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
cat_cols = df.select_dtypes(include=['object','category']).columns.tolist()

# Numeric univariate
for col in num_cols:
    fig, axes = plt.subplots(1,2, figsize=(10,3))
    sns.histplot(df[col].dropna(), kde=True, ax=axes[0])
    sns.boxplot(x=df[col].dropna(), ax=axes[1])
    axes[0].set_title(f"{col} distribution")
    axes[1].set_title(f"{col} boxplot")
    plt.tight_layout()
    plt.show()

# Categorical univariate
for col in cat_cols:
    vc = df[col].value_counts().head(15)
    plt.figure(figsize=(6,3))
    sns.barplot(x=vc.values, y=vc.index)
    plt.title(f"{col} top categories")
    plt.show()

# Correlation heatmap
corr = df[num_cols].corr()
mask = np.triu(np.ones_like(corr, dtype=bool))
plt.figure(figsize=(8,6))
sns.heatmap(corr, mask=mask, annot=True, fmt=".2f", cmap='coolwarm', square=True)
plt.title("Correlation matrix")
plt.show()

# Pairplot (sampled)
if len(df) > 500:
    sample = df.sample(500, random_state=1)
else:
    sample = df
sns.pairplot(sample[num_cols])
plt.show()

# Example: categorical vs numeric
if cat_cols and num_cols:
    sns.boxplot(data=df, x=cat_cols[0], y=num_cols[0])
    plt.xticks(rotation=45)
    plt.title(f"{num_cols[0]} by {cat_cols[0]}")
    plt.show()

print("EDA complete. Customize further as needed!")
