In [3]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Define a function to generate and plot income distributions
def plot_income_distribution(income_ranges, percentages, target_monthly_income, location, save_path):
    # Generate synthetic income data
    np.random.seed(123)  # For reproducibility
    n = 10000  # Number of data points to simulate
    income_data = []

    for i, (low, high) in enumerate(income_ranges):
        size = int(n * percentages[i] / 100)
        income_data.extend(np.random.uniform(low, high, size))

    income_data = np.array(income_data)

    # Calculate annual income for the monthly target income
    target_income = target_monthly_income * 12  # Annual income

    # Calculate percentile of target income
    percentile = np.mean(income_data <= target_income) * 100

    # Plot the KDE
    plt.figure(figsize=(10, 6))
    sns.kdeplot(income_data, fill=True, color="blue", alpha=0.5)
    plt.axvline(target_income, color="red", linestyle="--", linewidth=2, 
                label=f"Monthly Income: ${target_monthly_income:,}\n(Annual: ${target_income:,.0f})")
    plt.title(f"Income Distribution in {location} (2023): Kernel Density Estimate", fontsize=16)
    plt.xlabel("Income (USD)", fontsize=14)
    plt.ylabel("Density", fontsize=14)
    plt.grid(True, linestyle="--", alpha=0.7)

    # Add text for percentile
    plt.text(
        target_income, 0.00001, 
        f"{percentile:.1f}th Percentile", 
        color="red", fontsize=12, ha="center", va="bottom",
        bbox=dict(boxstyle="round,pad=0.3", edgecolor="red", facecolor="white", alpha=0.8)
    )

    # Set x-axis limits to start from $0
    plt.xlim(0, max(income_data) * 1.1)

    # Add footnote
    plt.annotate(
        "Values are in 2023 inflation-adjusted dollars.",
        xy=(0, 0), xycoords='axes fraction',
        fontsize=10, ha='left', va='top', color='gray',
        xytext=(10, -30), textcoords='offset points'
    )

    # Add legend
    plt.legend(fontsize=12, loc="upper right")

    plt.tight_layout()

    # Save the figure to the specified directory
    plt.savefig(save_path, dpi=300)

    # Close the plot to avoid overlap with the next plot
    plt.close()

# Define income ranges and percentages for New York City, Pasadena, and Ithaca
nyc_income_ranges = [
    (0, 10000), (10000, 14999), (15000, 24999), (25000, 34999),
    (35000, 49999), (50000, 74999), (75000, 99999), (100000, 149999),
    (150000, 199999), (200000, 300000)
]
nyc_percentages = [6.2, 5.8, 8.0, 11.1, 9.6, 19.2, 13.6, 15.4, 4.7, 6.5]
nyc_target_monthly_income = 3822  # New York City

pasadena_income_ranges = [
    (0, 10000), (10000, 14999), (15000, 24999), (25000, 34999),
    (35000, 49999), (50000, 74999), (75000, 99999), (100000, 149999),
    (150000, 199999), (200000, 300000)
]
pasadena_percentages = [6.2, 5.0, 8.0, 11.1, 9.6, 19.2, 13.6, 15.4, 4.7, 6.5]
pasadena_target_monthly_income = 2534  # Pasadena

ithaca_income_ranges = [
    (0, 10000), (10000, 14999), (15000, 24999), (25000, 34999),
    (35000, 49999), (50000, 74999), (75000, 99999), (100000, 149999),
    (150000, 199999), (200000, 300000)
]
ithaca_percentages = [18.4, 5.0, 8.7, 9.3, 14.7, 11.0, 8.0, 10.4, 5.9, 8.6]
ithaca_target_monthly_income = 2951  # Ithaca

# Ensure the 'figures' directory exists
figures_dir = "../figures"  # Adjust path as needed for your directory structure
os.makedirs(figures_dir, exist_ok=True)

# Plot and save income distributions for all three locations
plot_income_distribution(nyc_income_ranges, nyc_percentages, nyc_target_monthly_income, "New York City, NY", os.path.join(figures_dir, "income_3.png"))
plot_income_distribution(pasadena_income_ranges, pasadena_percentages, pasadena_target_monthly_income, "Pasadena, TX", os.path.join(figures_dir, "income_1.png"))
plot_income_distribution(ithaca_income_ranges, ithaca_percentages, ithaca_target_monthly_income, "Ithaca, NY", os.path.join(figures_dir, "income_2.png"))
