In [None]:
# Install required packages (if not already installed)
# !pip install pulp gurobipy pandas numpy

# Import libraries
import gurobipy as gp
from gurobipy import GRB
import pandas as pd
import numpy as np


# Facility cost and capacity by size
COST_K = {
    'S': 65000,   # Small: 50 slots
    'M': 95000,   # Medium: 100 slots
    'L': 115000   # Large: 200 slots
}

CAPACITY_K = {
    'S': 50,
    'M': 100,
    'L': 200
}

BUDGET = 100000000  # $100 Million budget constraint

# Demand criteria thresholds
INCOME_THRESHOLD = 60000.0         # Low-income threshold
EMPLOYMENT_THRESHOLD = 0.60        # High-employment threshold

# Factor for age group 10-14 (only ages 10,11,12 are relevant → 3 out of 5 years)
CHILD_POP_FACTOR = 0.6


def clean_zip_code(df, column_name):
    """Clean and standardize zip codes to 5-digit strings."""
    df[column_name] = (df[column_name]
                       .astype(str)
                       .str.split('.')
                       .str[0]
                       .str.zfill(5))
    df.dropna(subset=[column_name], inplace=True)
    return df

def calculate_rz(row):
    """
    Calculate Rz: minimum required capacity for zip code z.
    - If high-demand (low income or high employment): Rz = floor(0.5 * Pz) + 1
    - Otherwise: Rz = floor(1/3 * Pz) + 1
    """
    is_high_demand = (row['Ez'] >= EMPLOYMENT_THRESHOLD) or (row['Iz'] <= INCOME_THRESHOLD)
    threshold_cap = 0.5 * row['Pz'] if is_high_demand else (1/3) * row['Pz']
    return int(threshold_cap) + 1  # smallest integer strictly greater than threshold


print("Starting data processing...")

try:
    # --- Load Existing Capacity (Az) ---
    df_capacity = pd.read_csv('child_care_regulated.csv')
    df_capacity = clean_zip_code(df_capacity, 'zip_code')
    df_az = df_capacity.groupby('zip_code')['total_capacity'].sum().reset_index()
    df_az.rename(columns={'total_capacity': 'Az'}, inplace=True)

    # --- Load Child Population (Pz) ---
    df_pop = pd.read_csv('population.csv')
    df_pop.rename(columns={'zipcode': 'zip_code'}, inplace=True)
    df_pop = clean_zip_code(df_pop, 'zip_code')
    df_pop['Pz'] = df_pop['-5'] + df_pop['5-9'] + (CHILD_POP_FACTOR * df_pop['10-14'])
    df_pop['Pz'] = df_pop['Pz'].round().astype(int)
    df_pz = df_pop[['zip_code', 'Pz']]

    # --- Load Income (Iz) ---
    df_income = pd.read_csv('avg_individual_income.csv')
    df_income.rename(columns={'ZIP code': 'zip_code', 'average income': 'Iz'}, inplace=True)
    df_income = clean_zip_code(df_income, 'zip_code')

    # --- Load Employment Rate (Ez) ---
    df_emp = pd.read_csv('employment_rate.csv')
    df_emp.rename(columns={'zipcode': 'zip_code', 'employment rate': 'Ez'}, inplace=True)
    df_emp = clean_zip_code(df_emp, 'zip_code')

    # --- Merge All Demand Parameters ---
    df_demand = pd.merge(df_az, df_pz, on='zip_code', how='inner')
    df_demand = pd.merge(df_demand, df_income[['zip_code', 'Iz']], on='zip_code', how='left')
    df_demand = pd.merge(df_demand, df_emp[['zip_code', 'Ez']], on='zip_code', how='left')

    # Calculate Rz and identify current deserts
    df_demand['Rz'] = df_demand.apply(calculate_rz, axis=1)
    df_demand['is_desert'] = df_demand['Az'] < df_demand['Rz']

    # --- Load Potential Locations & Define δjz (service mapping) ---
    df_loc = pd.read_csv('potential_locations.csv')
    df_loc.rename(columns={'zipcode': 'zip_code'}, inplace=True)
    df_loc = clean_zip_code(df_loc, 'zip_code')
    df_loc['LocationID'] = 'L' + (df_loc.index + 1).astype(str)  # L1, L2, ...

except FileNotFoundError as e:
    print(f"\nCRITICAL ERROR: One of the required CSV files was not found: {e.filename}")
    print("Please ensure all five CSV files are in the same folder.")
    exit()
except Exception as e:
    print(f"\nCRITICAL DATA PROCESSING ERROR: {e}")
    exit()


Z = df_demand['zip_code'].tolist()  # Set of zip codes
J = df_loc['LocationID'].tolist()   # Set of potential facility locations
K = list(CAPACITY_K.keys())         # Facility sizes: ['S', 'M', 'L']

AZ = df_demand.set_index('zip_code')['Az'].to_dict()  # Existing capacity
RZ = df_demand.set_index('zip_code')['Rz'].to_dict()  # Required minimum capacity

# Delta_jz: δjz = 1 if location j serves zip code z (same zip)
DELTA_JZ = {}
for _, row in df_loc.iterrows():
    j = row['LocationID']
    z = row['zip_code']
    if z in Z:
        DELTA_JZ[(j, z)] = 1

print(f"Data Loaded and Processed.")
print(f"Total Zip Codes: {len(Z)}")
print(f"Total Potential Locations: {len(J)}")
print(f"Initial Deserts Identified: {df_demand['is_desert'].sum()} out of {len(Z)}")


try:
    model = gp.Model("ChildCareCapacityExpansion")

    # --- DECISION VARIABLES ---
    # X[j,k]: Binary variable: 1 if facility of size k is built at location j
    X = model.addVars(J, K, vtype=GRB.BINARY, name="X")

    # --- OBJECTIVE FUNCTION: Minimize Total Cost ---
    model.setObjective(
        gp.quicksum(COST_K[k] * X[j, k] for j in J for k in K),
        GRB.MINIMIZE
    )

    # --- CONSTRAINTS ---

    # C1: Desert Elimination Constraint (Fairness)
    # For each zip code z: Az + Σ(δjz * Kk * Xjk) >= Rz
    for z in Z:
        new_capacity = gp.quicksum(
            CAPACITY_K[k] * X[j, k]
            for j in J for k in K
            if (j, z) in DELTA_JZ
        )
        model.addConstr(AZ[z] + new_capacity >= RZ[z], name=f"Fairness_{z}")

    # C2: Budget Constraint
    total_cost = model.getObjective()
    model.addConstr(total_cost <= BUDGET, name="Budget")

    # C3: Facility Uniqueness Constraint
    # At most one facility per location j
    for j in J:
        model.addConstr(
            gp.quicksum(X[j, k] for k in K) <= 1,
            name=f"UniqueLocation_{j}"
        )


    model.optimize()

    if model.status == GRB.OPTIMAL:
        print("\n--- OPTIMAL SOLUTION FOUND ---")
        
        # 1. Print Total Cost
        optimal_cost = model.objVal
        print(f"Minimum Total Cost: ${optimal_cost:,.2f} "
              f"(Budget Used: {optimal_cost / BUDGET:.1%})")

        # 2. Print Facilities Built
        print("\nFacilities to Build (Location, Size, Cost):")
        facilities_built = []
        for j in J:
            for k in K:
                if X[j, k].X > 0.5:
                    zip_code = df_loc[df_loc['LocationID'] == j]['zip_code'].iloc[0]
                    facilities_built.append({
                        'LocationID': j,
                        'Size': k,
                        'Cost': COST_K[k],
                        'Capacity': CAPACITY_K[k],
                        'ZipCode': zip_code
                    })

        df_solution = pd.DataFrame(facilities_built)
        if not df_solution.empty:
            print(df_solution[['LocationID', 'ZipCode', 'Size', 'Cost', 'Capacity']])
            print(f"\nTotal facilities to build: {len(df_solution)}")
        else:
            print("No new facilities need to be built.")

        # 3. Verification: Check if all initial deserts are eliminated
        print("\nVerification: Final Capacity Status of Initial Deserts:")
        total_initial_deserts = df_demand['is_desert'].sum()
        total_deserts_remaining = 0

        for z in df_demand[df_demand['is_desert']]['zip_code']:
            new_cap = sum(
                CAPACITY_K[k] * X[j, k].X
                for j in J for k in K
                if (j, z) in DELTA_JZ
            )
            final_cap = AZ[z] + new_cap
            required_cap = RZ[z]
            status = "SUCCESS (Eliminated)" if final_cap >= required_cap else "FAIL (DESERT REMAINS)"
            if final_cap < required_cap:
                total_deserts_remaining += 1
            print(f" - Zip {z}: Final Cap={final_cap:.0f} | Required Cap={required_cap} | {status}")

        if total_deserts_remaining == 0 and total_initial_deserts > 0:
            print("\n✅ VERIFICATION SUCCESS: ALL initial child care deserts have been eliminated.")
        elif total_deserts_remaining > 0:
            print(f"\n❌ CRITICAL FAILURE: {total_deserts_remaining} deserts remain after optimization.")
    
    elif model.status == GRB.INFEASIBLE:
        print("\n--- MODEL IS INFEASIBLE ---")
        print("It is impossible to eliminate all child care deserts within the given budget.")
        # Optional: Run IIS to debug infeasibility
        # model.computeIIS()
        # model.write("infeasible.ilp")

    else:
        print(f"\n--- OPTIMIZATION STOPPED WITH STATUS {model.status} ---")

except gp.GurobiError as e:
    print(f'\nGurobi Error code {e.errno}: {e}')
    print("Please verify your Gurobi license is properly installed and active.")
except Exception as e:
    print(f"\nAn unexpected error occurred during model construction or solving: {e}")