<a href="https://colab.research.google.com/github/cray0101/DevvitApps/blob/main/Unified_Methodology_h1b.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:

# =============================================================================
# H1B VISA ALLOCATION SIMULATOR: UNIFIED LOTTERY MODEL (v3)
#
# This notebook implements and visualizes a single-lottery system.
#
# v3 Update: Features a single, master "beta" slider that controls both the
# system-wide simulation and the per-applicant view for a unified experience.
# =============================================================================

# --- SETUP: Import Libraries ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual, Layout, VBox, HBox
from IPython.display import display, clear_output

# Set plot style
sns.set_style("whitegrid")

# --- DATA PREPARATION ---

def load_initial_country_data():
    """Loads petition data from FY2019 for top countries."""
    data = {
        'India': 301616, 'China': 51317, 'Canada': 4165, 'South Korea': 3653,
        'Philippines': 3362, 'Mexico': 3109, 'Taiwan': 2707, 'Pakistan': 2147,
        'Brazil': 1888, 'United Kingdom': 1845, 'Nigeria': 1686, 'Nepal': 1675,
        'Japan': 1553, 'France': 1228, 'Iran': 1133, 'Venezuela': 1073,
        'Russia': 945, 'Colombia': 920, 'Germany': 910, 'Australia': 880
    }
    total_petitions = 424223
    data['Other'] = total_petitions - sum(data.values())
    return pd.DataFrame(list(data.items()), columns=['Country', 'Applicants'])

def generate_applicant_profiles(country_df, scholarship_rate=0.05):
    """Generates a detailed applicant DataFrame with simulated wage levels and scholarships."""
    print("Generating detailed applicant profiles... (This may take a moment)")
    wage_dist = {1: 0.35, 2: 0.30, 3: 0.20, 4: 0.15}
    all_applicants = [
        pd.DataFrame({
            'Country': row['Country'],
            'WageLevel': np.random.choice(list(wage_dist.keys()), size=row['Applicants'], p=list(wage_dist.values())),
            'HasScholarship': np.random.choice([False, True], size=row['Applicants'], p=[1 - scholarship_rate, scholarship_rate])
        }) for _, row in country_df.iterrows()
    ]
    print("Applicant profiles generated.")
    return pd.concat(all_applicants, ignore_index=True)

# --- CORE SIMULATION LOGIC ---

def calculate_final_tickets(df, beta, wage_weights, scholarship_multiplier):
    """Calculates the final ticket count for every applicant in a dataframe."""
    df_copy = df.copy()
    country_counts = df_copy['Country'].value_counts().to_frame('CountryApplicants')
    total_applicants = len(df_copy)
    country_counts['DiversityMultiplier'] = (total_applicants / country_counts['CountryApplicants']) ** beta
    df_copy = df_copy.merge(country_counts['DiversityMultiplier'], on='Country', how='left')
    df_copy['WageMultiplier'] = df_copy['WageLevel'].map(wage_weights)
    df_copy['ScholarshipMultiplier'] = df_copy['HasScholarship'].apply(lambda x: scholarship_multiplier if x else 1.0)
    df_copy['FinalTickets'] = df_copy['DiversityMultiplier'] * df_copy['WageMultiplier'] * df_copy['ScholarshipMultiplier']
    return df_copy

def run_unified_lottery_simulation(beta, wage_weights, scholarship_multiplier, applicants_df, total_visas=85000):
    """Runs the full unified lottery simulation."""
    sim_df_with_tickets = calculate_final_tickets(applicants_df, beta, wage_weights, scholarship_multiplier)
    winners_df = sim_df_with_tickets.sample(n=total_visas, weights='FinalTickets', random_state=42)
    return winners_df

def plot_system_results(winners_df, applicants_df):
    """Plots the outcomes of the system-wide simulation."""
    fig, axes = plt.subplots(3, 1, figsize=(12, 20))
    winner_counts = winners_df['Country'].value_counts().reset_index()
    sns.barplot(data=winner_counts, x='count', y='Country', ax=axes[0], palette="viridis")
    axes[0].set_title('1. Final Visa Distribution by Country', fontsize=16, loc='left')
    app_dist = applicants_df['Country'].value_counts(normalize=True).reset_index()
    win_dist = winners_df['Country'].value_counts(normalize=True).reset_index()
    comp_df = pd.merge(app_dist, win_dist, on='Country', suffixes=('_Applicant', '_Winner'))
    comp_df = comp_df.melt(id_vars='Country', value_vars=['proportion_Applicant', 'proportion_Winner'], var_name='Type', value_name='Percentage')
    top_countries = applicants_df['Country'].value_counts().head(10).index
    sns.barplot(data=comp_df[comp_df['Country'].isin(top_countries)], x='Percentage', y='Country', hue='Type', ax=axes[1], palette="rocket")
    axes[1].xaxis.set_major_formatter(plt.FuncFormatter('{:.0%}'.format))
    axes[1].set_title('2. Applicant vs. Winner Percentage (Top 10 Countries)', fontsize=16, loc='left')
    sns.countplot(data=winners_df, x='WageLevel', order=[1, 2, 3, 4], palette="crest", ax=axes[2])
    axes[2].set_title('3. Distribution of Visa Winners by Wage Level', fontsize=16, loc='left')
    plt.tight_layout(pad=3.0)
    plt.show()

def plot_applicant_chances(country, wage_level, has_scholarship, beta, applicants_df, total_visas=85000):
    """Calculates and displays an individual's chances based on a full-world recalculation."""
    wage_weights = {1: 1, 2: 2, 3: 4, 4: 8}
    scholarship_multiplier = 2.0
    sim_df_with_tickets = calculate_final_tickets(applicants_df, beta, wage_weights, scholarship_multiplier)
    total_global_tickets = sim_df_with_tickets['FinalTickets'].sum()
    diversity_multiplier = sim_df_with_tickets[sim_df_with_tickets['Country'] == country]['DiversityMultiplier'].iloc[0]
    wage_multiplier = wage_weights.get(wage_level, 1)
    sch_multiplier = scholarship_multiplier if has_scholarship else 1.0
    my_tickets = diversity_multiplier * wage_multiplier * sch_multiplier
    chance = (my_tickets / total_global_tickets) * total_visas if total_global_tickets > 0 else 0

    print(f"--- Your Applicant Profile ---")
    print(f"Country: {country} (Diversity Multiplier at beta={beta:.2f}: {diversity_multiplier:.2f}x)")
    print(f"Your Total Tickets: {my_tickets:.2f}")
    print("---------------------------------")
    print(f"Your Estimated Chance of Success: {chance:.2%}")

    fig, ax = plt.subplots(figsize=(8, 1))
    ax.set_xlim(0, max(0.01, chance * 5))
    sns.barplot(x=[chance], y=["Chance"], ax=ax, color='#3a9937')
    ax.xaxis.set_major_formatter(plt.FuncFormatter('{:.1%}'.format))
    plt.show()

# =============================================================================
# --- MAIN EXECUTION AND INTERACTIVE DASHBOARD ---
# =============================================================================

# Load and prepare data ONCE
country_data = load_initial_country_data()
full_applicants_df = generate_applicant_profiles(country_data)

# --- Define SHARED and INDIVIDUAL Widgets ---
style = {'description_width': 'initial'}
# This is the single, master beta slider
beta_slider = widgets.FloatSlider(value=0.5, min=0.0, max=1.5, step=0.05, description='Beta (Diversity Dial):', style=style, layout=Layout(width='90%'))

# --- UI for Part 1: System-Wide Simulation ---
system_ui_box = VBox([
    widgets.HTML("<h3>Part 1: System-Wide Simulation</h3><p>Shows the overall effect of the selected Beta value.</p>"),
    widgets.FloatSlider(value=2.0, min=1.0, max=10.0, step=0.5, description='Scholarship Multiplier:', style=style),
    widgets.HTML("<h4>Prevailing Wage Ticket Weights:</h4>"),
    HBox([widgets.IntText(value=1, description='L1'), widgets.IntText(value=2, description='L2'),
          widgets.IntText(value=4, description='L3'), widgets.IntText(value=8, description='L4')])
])

def run_and_plot_system_dashboard(beta, scholarship_multiplier, l1, l2, l3, l4):
    wage_weights = {1: l1, 2: l2, 3: l3, 4: l4}
    winners = run_unified_lottery_simulation(beta, wage_weights, scholarship_multiplier, full_applicants_df)
    plot_system_results(winners, full_applicants_df)

system_out = widgets.interactive_output(run_and_plot_system_dashboard, {
    'beta': beta_slider, 'scholarship_multiplier': system_ui_box.children[1],
    'l1': system_ui_box.children[3].children[0], 'l2': system_ui_box.children[3].children[1],
    'l3': system_ui_box.children[3].children[2], 'l4': system_ui_box.children[3].children[3]
})

# --- UI for Part 2: Individual Applicant Chance Estimator ---
estimator_ui_box = VBox([
    widgets.HTML("<h3>Part 2: Individual Applicant Chance Estimator</h3><p>Shows how the selected Beta value affects a specific applicant's chances.</p>"),
    widgets.Dropdown(options=country_data['Country'].unique(), value='Canada', description='Country:', style=style),
    widgets.SelectionSlider(options=[('Level 1', 1), ('Level 2', 2), ('Level 3', 3), ('Level 4', 4)], value=2, description='Wage Level:', style=style),
    widgets.Checkbox(value=False, description='Sponsored a US Citizen STEM Scholarship?')
])

estimator_out = widgets.interactive_output(
    lambda country, wage_level, has_scholarship, beta: plot_applicant_chances(country, wage_level, has_scholarship, beta, full_applicants_df),
    {'country': estimator_ui_box.children[1], 'wage_level': estimator_ui_box.children[2],
     'has_scholarship': estimator_ui_box.children[3], 'beta': beta_slider}
)

# --- Display Everything ---
print("\n✅ Simulator is ready! Adjust the master Beta slider or other controls to explore the Unified Lottery model.")
# Display the master slider first, then the two dependent sections.
display(beta_slider, VBox([system_ui_box, system_out]), VBox([estimator_ui_box, estimator_out]))

Generating detailed applicant profiles... (This may take a moment)
Applicant profiles generated.

✅ Simulator is ready! Adjust the master Beta slider or other controls to explore the Unified Lottery model.


FloatSlider(value=0.5, description='Beta (Diversity Dial):', layout=Layout(width='90%'), max=1.5, step=0.05, s…

VBox(children=(VBox(children=(HTML(value='<h3>Part 1: System-Wide Simulation</h3><p>Shows the overall effect o…

VBox(children=(VBox(children=(HTML(value="<h3>Part 2: Individual Applicant Chance Estimator</h3><p>Shows how t…