In [5]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from src.direct_effect_simulation import incarceration_rate, calculate_disparity_measures
import numpy as np
import pandas as pd

# Simulation parameters
thetas = np.linspace(0, 1, 5)  # Disparity parameter values
pi_values = np.linspace(.1, .9, 3)  # Group proportion values
avg_rate = 200  # Average incarceration rate per 100,000

# Store simulation results
data = []

# Run simulation
for pi in pi_values:
    for theta in thetas:
        # Calculate group rates
        rate_disadv = incarceration_rate(avg_rate, 'disadvantaged', theta, pi)
        rate_adv = incarceration_rate(avg_rate, 'advantaged', theta, pi)
        
        # Calculate population average
        pop_avg = pi * rate_disadv + (1 - pi) * rate_adv
        
        # Calculate disparity measures
        disparities = calculate_disparity_measures(rate_disadv, rate_adv)
        
        # Store results
        for group, rate in [('Disadvantaged', rate_disadv), 
                            ('Advantaged', rate_adv),
                            ('Population Average', pop_avg)]:
            data.append({
                "theta": theta,
                "Group": group,
                "Rate": rate,
                "pi": pi,
                **disparities
            })

# Convert results to DataFrame
results_df = pd.DataFrame(data)
results_df.head()

Unnamed: 0,theta,Group,Rate,pi,disparity_ratio,disparity_difference,odds_ratio,odds_disadvantaged,odds_advantaged
0,0.0,Disadvantaged,200.0,0.1,1.0,0.0,1.0,0.002004,0.002004
1,0.0,Advantaged,200.0,0.1,1.0,0.0,1.0,0.002004,0.002004
2,0.0,Population Average,200.0,0.1,1.0,0.0,1.0,0.002004,0.002004
3,0.25,Disadvantaged,650.0,0.1,4.333333,500.0,4.355142,0.006543,0.001502
4,0.25,Advantaged,150.0,0.1,4.333333,500.0,4.355142,0.006543,0.001502


In [11]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_simulation_results(df: pd.DataFrame, pi_values: list) -> None:
    """Create visualization of simulation results"""
    # Create figure with subplots
    fig = make_subplots(
        rows=len(pi_values), 
        cols=2,
        subplot_titles=[f"π = {pi}" for pi in pi_values for _ in range(2)],
        specs=[[{"type": "bar"}, {"type": "table"}] for _ in range(len(pi_values))],
        horizontal_spacing=0.05,
        vertical_spacing=0.1
    )

    # Add bar charts to left column
    for i, pi in enumerate(pi_values):
        # Filter data for this pi value
        pi_data = df[(df["pi"] == pi) & (df["Group"] != "Population Average")]
        
        # Add grouped bar chart
        for group in ["Disadvantaged", "Advantaged"]:
            group_data = pi_data[pi_data["Group"] == group]
            fig.add_trace(
                go.Bar(
                    x=group_data["theta"],
                    y=group_data["Rate"],
                    name=group,
                    legendgroup=group,
                    showlegend=(i == 0),  # Only show in legend for first row
                    marker_color="red" if group == "Disadvantaged" else "blue"
                ),
                row=i+1, col=1
            )
        
        # Add population average line
        pop_avg_data = df[(df["Group"] == "Population Average") & (df["pi"] == pi)]
        fig.add_trace(
            go.Scatter(
                x=pop_avg_data["theta"],
                y=pop_avg_data["Rate"],
                mode="lines",
                line=dict(color="grey", width=1.5, dash="dash"),
                name="Population Average",
                legendgroup="Population Average",
                showlegend=(i == 0)  # Only show in legend for first row
            ),
            row=i+1, col=1
        )
        
        # Add disparity metrics table to right column
        # Create summary table for each theta value
        table_data = []
        for theta in thetas:
            theta_data = df[(df["pi"] == pi) & (df["theta"] == theta) & (df["Group"] == "Disadvantaged")]
            if not theta_data.empty:
                row = theta_data.iloc[0]
                table_data.append([
                    f"{theta:.2f}",
                    f"{row['disparity_difference']:.1f}",
                    f"{row['disparity_ratio']:.2f}",
                    f"{row['odds_disadvantaged']:.5f}",
                    f"{row['odds_ratio']:.2f}"
                ])
        
        fig.add_trace(
            go.Table(
                header=dict(
                    values=["θ", "Disparity Diff", "Disparity Ratio", "Odds (Disadv)", "Odds Ratio"],
                    font=dict(size=10),
                    align="left"
                ),
                cells=dict(
                    values=list(map(list, zip(*table_data))),
                    font=dict(size=10),
                    align="left"
                )
            ),
            row=i+1, col=2
        )

    # Update layout
    fig.update_layout(
        height=300 * len(pi_values),
        width=1000,
        title="Group Effect on Incarceration Rates with Disparity Metrics",
        barmode="group",
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="center",
            x=0.5
        )
    )

    return fig

In [12]:
plot_simulation_results(results_df, pi_values)