<a href="https://colab.research.google.com/github/cray0101/DevvitApps/blob/main/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:

# =============================================================================
# H1B VISA ALLOCATION SIMULATOR: MERIT-FIRST MODEL
#
# This notebook implements and visualizes the "Merit-First Allocation" system.
#
# 1. System-Wide Simulation: Shows the overall visa distribution based
#    on a country's total "merit mass" (sum of applicant tickets),
#    dampened by an adjustable alpha.
#
# 2. Individual Chance Estimator: Calculates the approximate chance of
#    success for a specific applicant profile based on their competition.
# =============================================================================

# --- SETUP: Import Libraries ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual, Layout, VBox, HBox
from IPython.display import display, clear_output

# Set plot style
sns.set_style("whitegrid")

# --- DATA PREPARATION (COMMON TO BOTH TOOLS) ---

def load_initial_country_data():
    """Loads petition data from FY2019 for top countries."""
    data = {
        'India': 301616, 'China': 51317, 'Canada': 4165, 'South Korea': 3653,
        'Philippines': 3362, 'Mexico': 3109, 'Taiwan': 2707, 'Pakistan': 2147,
        'Brazil': 1888, 'United Kingdom': 1845, 'Nigeria': 1686, 'Nepal': 1675,
        'Japan': 1553, 'France': 1228, 'Iran': 1133, 'Venezuela': 1073,
        'Russia': 945, 'Colombia': 920, 'Germany': 910, 'Australia': 880
    }
    total_petitions = 424223
    data['Other'] = total_petitions - sum(data.values())
    return pd.DataFrame(list(data.items()), columns=['Country', 'Applicants'])

def generate_applicant_profiles(country_df, scholarship_rate=0.05):
    """Generates a detailed applicant DataFrame with simulated wage levels and scholarships."""
    print("Generating detailed applicant profiles... (This may take a moment)")
    wage_dist = {1: 0.35, 2: 0.30, 3: 0.20, 4: 0.15}
    all_applicants = [
        pd.DataFrame({
            'Country': row['Country'],
            'WageLevel': np.random.choice(list(wage_dist.keys()), size=row['Applicants'], p=list(wage_dist.values())),
            'HasScholarship': np.random.choice([False, True], size=row['Applicants'], p=[1 - scholarship_rate, scholarship_rate])
        }) for _, row in country_df.iterrows()
    ]
    print("Applicant profiles generated.")
    return pd.concat(all_applicants, ignore_index=True)

# --- PART 1: SYSTEM-WIDE SIMULATION ---

def run_merit_first_simulation(alpha, applicants_df, total_visas=85000):
    """Runs the merit-first simulation based on alpha."""
    # Using fixed weights for this simulation as requested
    wage_weights = {1: 1, 2: 2, 3: 4, 4: 8}
    scholarship_multiplier = 2.0

    sim_df = applicants_df.copy()
    sim_df['Tickets'] = sim_df['WageLevel'].map(wage_weights)
    sim_df.loc[sim_df['HasScholarship'], 'Tickets'] *= scholarship_multiplier

    # 1. Calculate Country "Merit Mass"
    country_merit = sim_df.groupby('Country').agg(
        Applicants=('Country', 'size'),
        MeritMass=('Tickets', 'sum')
    ).reset_index()

    # 2. Dampened Allocation using Merit Mass
    country_merit['WeightedShare'] = country_merit['MeritMass']**alpha
    total_weighted_share = country_merit['WeightedShare'].sum()
    country_merit['VisaQuota'] = (country_merit['WeightedShare'] / total_weighted_share) * total_visas
    country_merit['VisaQuota'] = country_merit['VisaQuota'].round().astype(int)

    # Adjust for rounding errors
    diff = total_visas - country_merit['VisaQuota'].sum()
    if diff != 0:
        largest_quota_idx = country_merit['VisaQuota'].idxmax()
        country_merit.loc[largest_quota_idx, 'VisaQuota'] += diff

    # 3. Internal Lotteries
    winners = sim_df.groupby('Country').apply(
        lambda x: x.sample(n=min(len(x), country_merit.set_index('Country').loc[x.name, 'VisaQuota']), weights='Tickets', random_state=42)
    ).reset_index(drop=True)

    return winners, country_merit

def plot_system_results(winners_df, country_stats_df, applicants_df):
    """Plots the outcomes of the system-wide simulation."""
    fig, axes = plt.subplots(3, 1, figsize=(12, 20))

    # Plot 1: Final Visa Distribution by Country
    winner_counts = winners_df['Country'].value_counts().reset_index()
    sns.barplot(data=winner_counts, x='count', y='Country', ax=axes[0], palette="viridis")
    axes[0].set_title('1. Final Visa Distribution by Country', fontsize=16, loc='left')
    axes[0].set_xlabel('Visas Awarded')

    # Plot 2: Applicant % vs. Winner %
    app_dist = applicants_df['Country'].value_counts(normalize=True).reset_index()
    win_dist = winners_df['Country'].value_counts(normalize=True).reset_index()
    comp_df = pd.merge(app_dist, win_dist, on='Country', suffixes=('_Applicant', '_Winner'))
    comp_df = comp_df.melt(id_vars='Country', value_vars=['proportion_Applicant', 'proportion_Winner'], var_name='Type', value_name='Percentage')
    top_countries = country_stats_df.sort_values(by='Applicants', ascending=False)['Country'].head(10)
    sns.barplot(data=comp_df[comp_df['Country'].isin(top_countries)], x='Percentage', y='Country', hue='Type', ax=axes[1], palette="rocket")
    axes[1].xaxis.set_major_formatter(plt.FuncFormatter('{:.0%}'.format))
    axes[1].set_title('2. Applicant vs. Winner Percentage (Top 10 Countries)', fontsize=16, loc='left')
    axes[1].set_xlabel('Percentage of Total')

    # Plot 3: Distribution of Winners by Wage Level
    sns.countplot(data=winners_df, x='WageLevel', order=[1, 2, 3, 4], palette="crest", ax=axes[2])
    axes[2].set_title('3. Distribution of Visa Winners by Wage Level', fontsize=16, loc='left')
    axes[2].set_xlabel('Wage Level')
    axes[2].set_ylabel('Number of Winners')

    plt.tight_layout(pad=3.0)
    plt.show()

# --- PART 2: INDIVIDUAL APPLICANT CHANCE ESTIMATOR ---

def setup_estimator_data(applicants_df, alpha=0.75):
    """Pre-computes simulation results for the estimator to use."""
    print("\nPre-computing data for the Applicant Chance Estimator (using default alpha=0.75)...")
    _, country_stats = run_merit_first_simulation(alpha, applicants_df)

    # Calculate total tickets per country from the base applicant data
    total_tickets_per_country = applicants_df.groupby('Country')['Tickets'].sum().reset_index()
    country_stats = pd.merge(country_stats, total_tickets_per_country, on='Country', suffixes=('', '_Total'))

    print("Estimator ready.")
    return country_stats.set_index('Country')

def plot_applicant_chances(country, wage_level, has_scholarship, estimator_data):
    """Calculates and displays an individual's chances."""
    wage_weights = {1: 1, 2: 2, 3: 4, 4: 8}
    scholarship_multiplier = 2.0

    # Calculate applicant's tickets
    my_tickets = wage_weights[wage_level]
    if has_scholarship:
        my_tickets *= scholarship_multiplier

    # Get pre-computed stats for the country
    country_info = estimator_data.loc[country]
    visa_quota = country_info['VisaQuota']
    total_applicants = country_info['Applicants']
    total_tickets = country_info['Tickets']

    # Estimate probability
    # This is a heuristic: (Your share of tickets) * (Number of "prizes")
    chance = (my_tickets / total_tickets) * visa_quota if total_tickets > 0 else 0

    # Display results
    print(f"--- Your Applicant Profile ---")
    print(f"Country: {country}")
    print(f"Your Tickets: {my_tickets}")
    print("---------------------------------")
    print(f"--- Competition in {country} (at alpha=0.75) ---")
    print(f"Visa Quota for Country: {visa_quota:,.0f}")
    print(f"Total Applicants in Country: {total_applicants:,.0f}")
    print("---------------------------------")
    print(f"Your Estimated Chance of Success: {chance:.2%}")

    # Visualization
    fig, ax = plt.subplots(figsize=(8, 1))
    ax.set_xlim(0, max(0.2, chance * 2)) # Dynamic axis limit
    sns.barplot(x=[chance], y=["Chance"], ax=ax, color='#3a9937')
    ax.xaxis.set_major_formatter(plt.FuncFormatter('{:.1%}'.format))
    ax.set_title("Probability Visualization")
    plt.show()

# =============================================================================
# --- MAIN EXECUTION AND INTERACTIVE DASHBOARD ---
# =============================================================================

# Load and prepare data ONCE
country_data = load_initial_country_data()
# The weights are fixed in this simulation, so we pre-calculate tickets
full_applicants_df = generate_applicant_profiles(country_data)
full_applicants_df['Tickets'] = full_applicants_df['WageLevel'].map({1: 1, 2: 2, 3: 4, 4: 8})
full_applicants_df.loc[full_applicants_df['HasScholarship'], 'Tickets'] *= 2.0

# --- UI for Part 1: System-Wide Simulation ---
system_ui_box = VBox([
    widgets.HTML("<h2>Part 1: System-Wide Simulation</h2><p>Adjust <b>alpha</b> to see how it affects the overall distribution of visas across countries under the 'Merit-First' model.</p>"),
    widgets.FloatSlider(value=0.75, min=0.01, max=1.0, step=0.01, description='Alpha (Dampening Factor):', style={'description_width': 'initial'}, readout_format='.2f')
])

# ** THIS IS THE CORRECTED SECTION **
# Define a wrapper function that calls the simulation and then the plotting function
# with all the correct arguments.
def run_and_plot_system(alpha):
    winners, country_stats = run_merit_first_simulation(alpha, full_applicants_df)
    plot_system_results(winners, country_stats, full_applicants_df)

# Link the alpha slider widget to our new wrapper function
system_out = widgets.interactive_output(
    run_and_plot_system,
    {'alpha': system_ui_box.children[1]}
)

# --- UI for Part 2: Individual Chance Estimator ---
# Pre-compute the data needed for the estimator
estimator_data_precomputed = setup_estimator_data(full_applicants_df)

estimator_ui_box = VBox([
    widgets.HTML("<h2>Part 2: Individual Applicant Chance Estimator</h2><p>Select a profile to estimate an applicant's chance of success. This calculation is based on a pre-run simulation with <b>alpha set to 0.75</b>.</p>"),
    widgets.Dropdown(options=country_data['Country'].unique(), value='Canada', description='Country:', style={'description_width': 'initial'}),
    widgets.SelectionSlider(options=[('Level 1', 1), ('Level 2', 2), ('Level 3', 3), ('Level 4', 4)], value=2, description='Wage Level:', style={'description_width': 'initial'}),
    widgets.Checkbox(value=False, description='Sponsored a US Citizen STEM Scholarship?')
])

estimator_out = widgets.interactive_output(
    lambda country, wage_level, has_scholarship: plot_applicant_chances(country, wage_level, has_scholarship, estimator_data_precomputed),
    {'country': estimator_ui_box.children[1], 'wage_level': estimator_ui_box.children[2], 'has_scholarship': estimator_ui_box.children[3]}
)

# Display everything
print("\n✅ Simulator is ready! Adjust the controls below to explore the model.")
display(system_ui_box, system_out)
display(estimator_ui_box, estimator_out)

Generating detailed applicant profiles... (This may take a moment)
Applicant profiles generated.

Pre-computing data for the Applicant Chance Estimator (using default alpha=0.75)...
Estimator ready.


  winners = sim_df.groupby('Country').apply(



✅ Simulator is ready! Adjust the controls below to explore the model.


VBox(children=(HTML(value="<h2>Part 1: System-Wide Simulation</h2><p>Adjust <b>alpha</b> to see how it affects…

Output()

VBox(children=(HTML(value="<h2>Part 2: Individual Applicant Chance Estimator</h2><p>Select a profile to estima…

Output()