# Exploratory Data Analysis Template

**Purpose:** [Describe the goal of this analysis]

**Dataset:** [Dataset name and source]

**Date:** [YYYY-MM-DD]

## Setup

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set random seed for reproducibility
np.random.seed(42)

# Configure plotting
plt.style.use('default')
sns.set_palette('husl')
%matplotlib inline

# Paths
DATA_DIR = Path("../../data")
REPORTS_DIR = Path("../../reports")
REPORTS_DIR.mkdir(exist_ok=True)

ModuleNotFoundError: No module named 'seaborn'

## Load Data

In [None]:
# Load your dataset
df = pd.read_parquet(DATA_DIR / "processed" / "dataset.parquet")
print(f"Shape: {df.shape}")
df.head()

## Basic Information

In [None]:
# Dataset info
df.info()

In [None]:
# Statistical summary
df.describe()

In [None]:
# Missing values
missing = df.isnull().sum()
missing[missing > 0].sort_values(ascending=False)

## Univariate Analysis

In [None]:
# Distribution plots for numerical features
numerical_cols = df.select_dtypes(include=[np.number]).columns

fig, axes = plt.subplots(nrows=len(numerical_cols), ncols=2, figsize=(12, 4*len(numerical_cols)))
for idx, col in enumerate(numerical_cols):
    # Histogram
    axes[idx, 0].hist(df[col].dropna(), bins=50, edgecolor='black')
    axes[idx, 0].set_title(f'{col} - Histogram')
    axes[idx, 0].set_xlabel(col)
    
    # Box plot
    axes[idx, 1].boxplot(df[col].dropna())
    axes[idx, 1].set_title(f'{col} - Boxplot')
    axes[idx, 1].set_ylabel(col)

plt.tight_layout()
plt.show()

## Bivariate Analysis

In [None]:
# Correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(df[numerical_cols].corr(), annot=True, cmap='coolwarm', center=0)
plt.title('Correlation Matrix')
plt.show()

## Key Findings

1. [Finding 1]
2. [Finding 2]
3. [Finding 3]

## Next Steps

- [ ] [Action item 1]
- [ ] [Action item 2]
- [ ] [Action item 3]