In [1]:
# notebooks/unimodal_analysis.ipynb
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import numpy as np

# Load cleaned data
df = pd.read_csv("../data/processed/cleaned_flood_data.csv")
os.makedirs("../figures/unimodal", exist_ok=True)

# Key variables from cleaned dataset
variables = [
    'PERSONS_AFFECTED',
    'DISPLACED_PERSONS',
    'SEVERITY_RATIO',
    'YEAR',
    'MONTH'
]

# 1. Distribution Plots (Histograms with KDE)
print("Generating distribution plots...")
for var in variables:
    plt.figure(figsize=(10, 6))
    
    # Log scale for affected/displaced persons due to right-skew
    log_scale = var in ['PERSONS_AFFECTED', 'DISPLACED_PERSONS']
    
    sns.histplot(
        data=df,
        x=var,
        kde=True,
        bins=30,
        log_scale=log_scale,
        color='skyblue'
    )
    
    plt.title(f"Distribution of {var.replace('_', ' ')}")
    plt.xlabel(var.replace('_', ' '))
    plt.ylabel('Count')
    
    if log_scale:
        plt.xlim(left=1)  # Avoid log(0)
    
    plt.tight_layout()
    plt.savefig(f"../figures/unimodal/dist_{var.lower()}.png", dpi=300)
    plt.close()

# 2. Boxplots for Numerical Variables
print("Generating boxplots...")
for var in ['PERSONS_AFFECTED', 'DISPLACED_PERSONS', 'SEVERITY_RATIO']:
    plt.figure(figsize=(10, 6))
    
    # Use log scale for better visualization of outliers
    sns.boxplot(
        data=df,
        y=np.log10(df[var] + 1),  # +1 to avoid log(0)
        color='lightgreen'
    )
    
    plt.title(f"Boxplot of {var.replace('_', ' ')} (Log Scale)")
    plt.ylabel(f"log10({var})")
    plt.tight_layout()
    plt.savefig(f"../figures/unimodal/boxplot_{var.lower()}.png", dpi=300)
    plt.close()

# 3. Temporal Analysis
print("Generating temporal plots...")
# Monthly distribution
plt.figure(figsize=(12, 6))
month_order = ['Jan','Feb','Mar','Apr','May','Jun',
               'Jul','Aug','Sep','Oct','Nov','Dec']
sns.countplot(
    data=df,
    x='MONTH',
    order=range(1,13),
    palette='viridis'
)
plt.title("Flood Occurrences by Month (2022)")
plt.xlabel('Month')
plt.ylabel('Number of Flood Events')
plt.xticks(ticks=range(12), labels=month_order)
plt.tight_layout()
plt.savefig("../figures/unimodal/monthly_distribution.png", dpi=300)
plt.close()

# 4. State-Level Analysis
print("Generating state-level analysis...")
# Top 15 states by affected persons
top_states = df.groupby('STATE')['PERSONS_AFFECTED'].sum().nlargest(15)
plt.figure(figsize=(12, 6))
sns.barplot(
    x=top_states.values,
    y=top_states.index,
    palette='rocket'
)
plt.title("Top 15 States by Total Persons Affected")
plt.xlabel('Total Persons Affected')
plt.ylabel('State')
plt.tight_layout()
plt.savefig("../figures/unimodal/top_states_affected.png", dpi=300)
plt.close()

print("✅ Unimodal analysis complete. Visualizations saved to ../figures/unimodal/")

Generating distribution plots...


  sqr = _ensure_numeric((avg - values) ** 2)
  sqr = _ensure_numeric((avg - values) ** 2)


Generating boxplots...
Generating temporal plots...



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(


Generating state-level analysis...



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(


✅ Unimodal analysis complete. Visualizations saved to ../figures/unimodal/
