<a href="https://colab.research.google.com/github/cray0101/DevvitApps/blob/main/Unified_Methodology_h1b.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:

# =============================================================================
# H1B VISA ALLOCATION SIMULATOR: MERIT-PRIMARY ADDITIVE MODEL (v1.1)
#
# This notebook implements a single-lottery system where an applicant's
# final score is an ADDITIVE combination of their Merit Score and a
# Diversity Bonus, ensuring merit is always the primary factor.
#
# v1.1a Fixes: Corrected a NameError typo in the estimator view.
# =============================================================================

# --- SETUP: Import Libraries ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual, Layout, VBox, HBox
from IPython.display import display, clear_output

# Set plot style
sns.set_style("whitegrid")

# --- DATA PREPARATION ---

def load_initial_country_data():
    """Loads petition data from FY2019 for top countries."""
    data = {
        'India': 301616, 'China': 51317, 'Canada': 4165, 'South Korea': 3653,
        'Philippines': 3362, 'Mexico': 3109, 'Taiwan': 2707, 'Pakistan': 2147,
        'Brazil': 1888, 'United Kingdom': 1845, 'Nigeria': 1686, 'Nepal': 1675,
        'Japan': 1553, 'France': 1228, 'Iran': 1133, 'Venezuela': 1073,
        'Russia': 945, 'Colombia': 920, 'Germany': 910, 'Australia': 880
    }
    total_petitions = 424223
    data['Other'] = total_petitions - sum(data.values())
    return pd.DataFrame(list(data.items()), columns=['Country', 'Applicants'])

def generate_applicant_profiles(country_df, scholarship_rate=0.05):
    """Generates a detailed applicant DataFrame with simulated wage levels and scholarships."""
    print("Generating detailed applicant profiles... (This may take a moment)")
    wage_dist = {1: 0.35, 2: 0.30, 3: 0.20, 4: 0.15}
    all_applicants = [
        pd.DataFrame({
            'Country': row['Country'],
            'WageLevel': np.random.choice(list(wage_dist.keys()), size=row['Applicants'], p=list(wage_dist.values())),
            'HasScholarship': np.random.choice([False, True], size=row['Applicants'], p=[1 - scholarship_rate, scholarship_rate])
        }) for _, row in country_df.iterrows()
    ]
    print("Applicant profiles generated.")
    return pd.concat(all_applicants, ignore_index=True)

# --- CORE SIMULATION LOGIC ---

def calculate_final_score_additive(df, beta, wage_weights, scholarship_multiplier):
    """Calculates the final score for every applicant using the additive model."""
    df_copy = df.copy()

    # 1. Calculate Core Merit Score
    df_copy['MeritScore'] = df_copy['WageLevel'].map(wage_weights)
    df_copy.loc[df_copy['HasScholarship'], 'MeritScore'] *= scholarship_multiplier

    # 2. Calculate Additive Diversity Bonus
    country_counts = df_copy['Country'].value_counts().to_frame('CountryApplicants')
    total_applicants = len(df_copy)
    # Using log to compress the range and prevent the bonus from becoming too extreme
    country_counts['DiversityBonus'] = beta * np.log1p(total_applicants / country_counts['CountryApplicants'])
    df_copy = df_copy.merge(country_counts['DiversityBonus'], on='Country', how='left')

    # 3. Calculate Final Score
    df_copy['FinalScore'] = df_copy['MeritScore'] + df_copy['DiversityBonus']

    # Ensure score is not negative (log can be tricky, though log1p helps)
    df_copy['FinalScore'] = df_copy['FinalScore'].clip(lower=0.01)

    return df_copy

def run_additive_simulation(beta, wage_weights, scholarship_multiplier, applicants_df, total_visas=85000):
    """Runs the full additive model simulation."""
    sim_df_with_scores = calculate_final_score_additive(applicants_df, beta, wage_weights, scholarship_multiplier)

    # 4. Run Single Global Lottery
    winners_df = sim_df_with_scores.sample(n=total_visas, weights='FinalScore', random_state=42)
    return winners_df

def plot_system_results(winners_df, applicants_df):
    """Plots the outcomes of the system-wide simulation."""
    fig, axes = plt.subplots(3, 1, figsize=(12, 20))
    winner_counts = winners_df['Country'].value_counts().reset_index()
    sns.barplot(data=winner_counts, x='count', y='Country', ax=axes[0], palette="viridis")
    axes[0].set_title('1. Final Visa Distribution by Country', fontsize=16, loc='left')
    app_dist = applicants_df['Country'].value_counts(normalize=True).reset_index()
    win_dist = winners_df['Country'].value_counts(normalize=True).reset_index()
    comp_df = pd.merge(app_dist, win_dist, on='Country', suffixes=('_Applicant', '_Winner'))
    comp_df = comp_df.melt(id_vars='Country', value_vars=['proportion_Applicant', 'proportion_Winner'], var_name='Type', value_name='Percentage')
    top_countries = applicants_df['Country'].value_counts().head(10).index
    sns.barplot(data=comp_df[comp_df['Country'].isin(top_countries)], x='Percentage', y='Country', hue='Type', ax=axes[1], palette="rocket")
    axes[1].xaxis.set_major_formatter(plt.FuncFormatter('{:.0%}'.format))
    axes[1].set_title('2. Applicant vs. Winner Percentage (Top 10 Countries)', fontsize=16, loc='left')
    sns.countplot(data=winners_df, x='WageLevel', order=[1, 2, 3, 4], palette="crest", ax=axes[2])
    axes[2].set_title('3. Distribution of Visa Winners by Wage Level', fontsize=16, loc='left')
    plt.tight_layout(pad=3.0)
    plt.show()

# --- INDIVIDUAL APPLICANT CHANCE ESTIMATOR ---

def plot_additive_applicant_chances(country, wage_level, has_scholarship, beta, applicants_df, total_visas=85000):
    """Calculates and displays an individual's chances for the additive model."""
    wage_weights = {1: 1, 2: 2, 3: 4, 4: 8} # Use fixed weights for simplicity in estimator
    scholarship_multiplier = 2.0

    # Calculate scores for the entire pool to get the denominator
    sim_df_with_scores = calculate_final_score_additive(applicants_df, beta, wage_weights, scholarship_multiplier)
    total_global_score = sim_df_with_scores['FinalScore'].sum()

    # Calculate the specific applicant's score
    my_merit_score = wage_weights.get(wage_level, 1) * (scholarship_multiplier if has_scholarship else 1)
    my_diversity_bonus = sim_df_with_scores[sim_df_with_scores['Country'] == country]['DiversityBonus'].iloc[0]
    my_final_score = my_merit_score + my_diversity_bonus

    # Probability is your share of the total score
    chance = (my_final_score / total_global_score) * total_visas if total_global_score > 0 else 0

    # ** THIS IS THE CORRECTED LINE **
    print(f"--- Your Applicant Profile ---")
    print(f"Country: {country} | Wage Level: {wage_level} | Scholarship: {'Yes' if has_scholarship else 'No'}")
    print(f"Merit Score: {my_merit_score:.2f}")
    print(f"Diversity Bonus (at beta={beta:.2f}): +{my_diversity_bonus:.2f}")
    print(f"Your Final Score: {my_final_score:.2f}")
    print("---------------------------------")
    print(f"Your Estimated Chance of Success: {chance:.2%}")

    fig, ax = plt.subplots(figsize=(8, 1))
    ax.set_xlim(0, max(0.01, chance * 5))
    sns.barplot(x=[chance], y=["Chance"], ax=ax, color='#3a9937')
    ax.xaxis.set_major_formatter(plt.FuncFormatter('{:.1%}'.format))
    plt.show()

# =============================================================================
# --- MAIN EXECUTION AND INTERACTIVE DASHBOARD ---
# =============================================================================

# Load and prepare data ONCE
country_data = load_initial_country_data()
full_applicants_df = generate_applicant_profiles(country_data)

# --- Define SHARED and INDIVIDUAL Widgets ---
style = {'description_width': 'initial'}
# This is the single, master beta slider
beta_slider = widgets.FloatSlider(value=1.0, min=0.0, max=5.0, step=0.1, description='Beta (Diversity Dial):', style=style, layout=Layout(width='90%'))

# --- UI for Part 1: System-Wide Simulation ---
system_ui_box = VBox([
    widgets.HTML("<h3>Part 1: System-Wide Simulation</h3><p>Shows the overall effect of the selected Beta value.</p>"),
    widgets.FloatSlider(value=2.0, min=1.0, max=10.0, step=0.5, description='Scholarship Multiplier:', style=style),
    widgets.HTML("<h4>Prevailing Wage Ticket Weights:</h4>"),
    HBox([widgets.IntText(value=1, description='L1'), widgets.IntText(value=2, description='L2'),
          widgets.IntText(value=4, description='L3'), widgets.IntText(value=8, description='L4')])
])

def run_and_plot_system_dashboard(beta, scholarship_multiplier, l1, l2, l3, l4):
    wage_weights = {1: l1, 2: l2, 3: l3, 4: l4}
    winners = run_additive_simulation(beta, wage_weights, scholarship_multiplier, full_applicants_df)
    plot_system_results(winners, full_applicants_df)

system_out = widgets.interactive_output(run_and_plot_system_dashboard, {
    'beta': beta_slider, 'scholarship_multiplier': system_ui_box.children[1],
    'l1': system_ui_box.children[3].children[0], 'l2': system_ui_box.children[3].children[1],
    'l3': system_ui_box.children[3].children[2], 'l4': system_ui_box.children[3].children[3]
})

# --- UI for Part 2: Individual Applicant Chance Estimator ---
estimator_ui_box = VBox([
    widgets.HTML("<h3>Part 2: Individual Applicant Chance Estimator</h3><p>Shows how the selected Beta value affects a specific applicant's chances.</p>"),
    widgets.Dropdown(options=country_data['Country'].unique(), value='Canada', description='Country:', style=style),
    widgets.SelectionSlider(options=[('Level 1', 1), ('Level 2', 2), ('Level 3', 3), ('Level 4', 4)], value=2, description='Wage Level:', style=style),
    widgets.Checkbox(value=False, description='Sponsored a US Citizen STEM Scholarship?')
])

def update_estimator_view(country, wage_level, has_scholarship, beta):
    # This wrapper prevents re-running the whole simulation just for the estimator view
    # It passes the full dataframe to the plotting function which does the live calculation
    plot_additive_applicant_chances(country, wage_level, has_scholarship, beta, full_applicants_df)

estimator_out = widgets.interactive_output(
    update_estimator_view,
    {'country': estimator_ui_box.children[1], 'wage_level': estimator_ui_box.children[2],
     'has_scholarship': estimator_ui_box.children[3], 'beta': beta_slider}
)

# --- Display Everything ---
print("\n✅ Simulator is ready! Adjust the master Beta slider or other controls to explore the Merit-Primary Additive model.")
# Display the master slider first, then the two dependent sections.
display(beta_slider, VBox([system_ui_box, system_out]), VBox([estimator_ui_box, estimator_out]))

Generating detailed applicant profiles... (This may take a moment)
Applicant profiles generated.

✅ Simulator is ready! Adjust the master Beta slider or other controls to explore the Merit-Primary Additive model.


FloatSlider(value=1.0, description='Beta (Diversity Dial):', layout=Layout(width='90%'), max=5.0, style=Slider…

VBox(children=(VBox(children=(HTML(value='<h3>Part 1: System-Wide Simulation</h3><p>Shows the overall effect o…

VBox(children=(VBox(children=(HTML(value="<h3>Part 2: Individual Applicant Chance Estimator</h3><p>Shows how t…