In [11]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import pdist, squareform
from gurobipy import Model, GRB

df_pop = pd.read_csv('/Users/jerry/Desktop/ChildCareDeserts_Data/population.csv')
df_income = pd.read_csv('/Users/jerry/Desktop/ChildCareDeserts_Data/avg_individual_income.csv')
df_emp = pd.read_csv('/Users/jerry/Desktop/ChildCareDeserts_Data/employment_rate.csv')
df_cc = pd.read_csv('/Users/jerry/Desktop/ChildCareDeserts_Data/child_care_regulated.csv')
df_pot = pd.read_csv('/Users/jerry/Desktop/ChildCareDeserts_Data/potential_locations.csv')

df_income = df_income.rename(columns={'ZIP code': 'zipcode'})
df_cc = df_cc.rename(columns={'zip_code': 'zipcode'})

for df in [df_pop, df_income, df_emp, df_cc, df_pot]:
    df['zipcode'] = df['zipcode'].astype(str).str[:5]

df = df_pop.merge(df_income, on='zipcode').merge(df_emp, on='zipcode')

set_income = set(df_income['zipcode'].unique())
set_emp = set(df_emp['zipcode'].unique())
set_cc = set(df_cc['zipcode'].unique())
set_pot = set(df_pot['zipcode'].unique())
set_pop = set(df_pop['zipcode'].unique())

print("Unique zipcode：")
print(f"income: {len(set_income)}")
print(f"employment: {len(set_emp)}")
print(f"child care: {len(set_cc)}")
print(f"potential locations: {len(set_pot)}")
print(f"population: {len(set_pop)}")


def overlap_ratio(set_a, set_b):
    """返回交集大小，以及占 a 和 b 的比例"""
    inter = set_a & set_b
    return len(inter), len(inter)/len(set_a), len(inter)/len(set_b)

# 所有表名和集合
sets = {
    'income': set_income,
    'employment': set_emp,
    'child_care': set_cc,
    'potential': set_pot,
    'population': set_pop
}

print("\n两两 zipcode 重合情况：")
print(f"{'Pair':<30} {'Overlap Count':<15} {'In A (%)':<10} {'In B (%)':<10}")
print("-" * 60)

keys = list(sets.keys())
for i in range(len(keys)):
    for j in range(i+1, len(keys)):
        a_name, b_name = keys[i], keys[j]
        a_set, b_set = sets[a_name], sets[b_name]
        count, ratio_a, ratio_b = overlap_ratio(a_set, b_set)
        print(f"{a_name:<12} ∩ {b_name:<12} {count:<15} {ratio_a:.2%}{'':<10} {ratio_b:.2%}")


Unique zipcode：
income: 1534
employment: 1375
child care: 1188
potential locations: 2154
population: 1646

两两 zipcode 重合情况：
Pair                           Overlap Count   In A (%)   In B (%)  
------------------------------------------------------------
income       ∩ employment   1375            89.63%           100.00%
income       ∩ child_care   1131            73.73%           95.20%
income       ∩ potential    1534            100.00%           71.22%
income       ∩ population   1375            89.63%           83.54%
employment   ∩ child_care   1023            74.40%           86.11%
employment   ∩ potential    1375            100.00%           63.83%
employment   ∩ population   1375            100.00%           83.54%
child_care   ∩ potential    1184            99.66%           54.97%
child_care   ∩ population   1066            89.73%           64.76%
potential    ∩ population   1646            76.42%           100.00%


In [12]:
import pandas as pd
import numpy as np
from gurobipy import Model, GRB, quicksum

#clean up column names 
df_income = df_income.rename(columns={'ZIP code': 'zipcode'})
df_cc = df_cc.rename(columns={'zip_code': 'zipcode'})
for df in [df_pop, df_income, df_emp, df_cc, df_pot]:
    df['zipcode'] = df['zipcode'].astype(str).str[:5]

# rename population columns
df_pop = df_pop.rename(columns={'-5':'under5', '5-9':'5_9', '10-14':'10_14'})
df_pop

Unnamed: 0,zipcode,Total,under5,5_9,10_14,15-19,20-24,25-29,30-34,35-39,40-44,45-49,50-54,55-59,60-64,65-69,70-74,75-79,80-84,85+
0,6390,53,0,1,5,0,6,0,9,18,0,12,2,0,0,0,0,0,0,0
1,10001,27004,744,784,942,1035,2296,3806,3588,2524,1702,1903,1704,1225,1323,933,815,616,488,576
2,10002,76518,2142,3046,3198,2652,4528,6988,6278,5157,4962,4822,4410,6106,4548,4815,4748,2531,2793,2794
3,10003,53877,1440,1034,953,7013,6344,7100,6427,3221,2907,1988,2698,2350,2274,2793,1854,1646,779,1056
4,10004,4579,433,182,161,108,109,601,724,490,241,313,549,279,199,173,2,15,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1641,14774,290,4,54,18,24,12,5,2,31,8,15,8,27,68,6,6,2,0,0
1642,14785,130,0,0,0,0,0,33,32,0,0,0,0,36,29,0,0,0,0,0
1643,14788,104,0,0,1,3,1,11,0,0,0,41,4,18,2,0,0,2,0,21
1644,14805,765,31,16,29,24,42,12,31,37,28,31,29,143,105,36,141,12,8,10


In [14]:
# calc populatio
df_pop['pop_0_5']  = df_pop['under5']
df_pop['pop_5_12'] = df_pop['5_9'] + 0.6 * df_pop['10_14']
df_pop['pop_0_12'] = df_pop['under5'] + df_pop['5_9'] + 0.6 * df_pop['10_14']
df_pop = df_pop[(df_pop['pop_0_5']>0) & (df_pop['pop_0_12']>0)].copy()


In [15]:
# merge income + emp
df_income_emp = df_income.merge(df_emp, on="zipcode", how="left")
df = df_pop.merge(df_income_emp, on="zipcode", how="left")
df['avg_individual_income'] = df['average income'].fillna(np.inf)
df['employment rate'] = df['employment rate'].fillna(0.0)


In [16]:
# tag high demand areas
df['is_high_demand'] = ((df['employment rate'] >= 0.6) | (df['avg_individual_income'] <= 60000)).astype(int)
df['threshold'] = np.where(df['is_high_demand']==1, 0.5, 1/3)

# current facilities
reg_agg = df_cc.groupby("zipcode", as_index=False).agg(
    exist_0_5=("children_capacity", "sum"),
    exist_5_12=("school_age_capacity", "sum"),
    exist_total=("total_capacity", "sum")
)
df = df.merge(reg_agg, on="zipcode", how="left").fillna(0)
df['exist_total_use'] = np.where(df['exist_total']>0, df['exist_total'],
                                 df['exist_0_5']+df['exist_5_12'])

# potential sites
pot_sites = df_pot.groupby("zipcode", as_index=False).size().rename(columns={"size":"num_sites"})
df = df.merge(pot_sites, on="zipcode", how="left").fillna({"num_sites":0})
df

Unnamed: 0,zipcode,Total,under5,5_9,10_14,15-19,20-24,25-29,30-34,35-39,...,average income,employment rate,avg_individual_income,is_high_demand,threshold,exist_0_5,exist_5_12,exist_total,exist_total_use,num_sites
0,10001,27004,744,784,942,1035,2296,3806,3588,2524,...,102878.033603,0.595097,1.028780e+05,0,0.333333,24.0,585.0,609.0,609.0,100
1,10002,76518,2142,3046,3198,2652,4528,6988,6278,5157,...,59604.041165,0.520662,5.960404e+04,1,0.500000,203.0,4508.0,4729.0,4729.0,100
2,10003,53877,1440,1034,953,7013,6344,7100,6427,3221,...,114273.049645,0.497244,1.142730e+05,0,0.333333,0.0,1995.0,1995.0,1995.0,100
3,10004,4579,433,182,161,108,109,601,724,490,...,132004.310345,0.506661,1.320043e+05,0,0.333333,0.0,263.0,263.0,263.0,100
4,10005,8801,484,204,229,53,989,2604,1144,945,...,121437.713311,0.665833,1.214377e+05,1,0.500000,0.0,39.0,39.0,39.0,100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1402,14770,2888,137,197,223,175,190,242,178,80,...,55523.255814,0.446676,5.552326e+04,1,0.500000,0.0,55.0,70.0,70.0,100
1403,14772,3993,256,253,224,301,163,180,198,256,...,57164.634146,0.410719,5.716463e+04,1,0.500000,12.0,64.0,108.0,108.0,100
1404,14774,290,4,54,18,24,12,5,2,31,...,0.000000,0.000000,inf,0,0.333333,0.0,62.0,62.0,62.0,100
1405,14805,765,31,16,29,24,42,12,31,37,...,59375.000000,0.679739,5.937500e+04,1,0.500000,6.0,2.0,8.0,8.0,100


In [None]:
# params
ZIPS = df["zipcode"].tolist()
Z = range(len(ZIPS))

pop0_5  = df["pop_0_5"].to_numpy()
pop5_12 = df["pop_5_12"].to_numpy()
pop0_12 = df["pop_0_12"].to_numpy()
thresh  = df["threshold"].to_numpy()
e05     = df["exist_0_5"].to_numpy()
e512    = df["exist_5_12"].to_numpy()
eTot    = df["exist_total_use"].to_numpy()
site_cap = df["num_sites"].to_numpy()

SIZES = ["S","M","L"]
cap_total = {"S":100, "M":200, "L":400}
cap_0_5   = {"S":50, "M":100, "L":200}
cap_5_12  = {k: cap_total[k]-cap_0_5[k] for k in SIZES}
cost      = {"S":65000, "M":95000, "L":115000}
BUDGET = 9000_000_000


In [None]:
# build model
m = Model("childcare_plan")
# decision vars
x = {(z,s): m.addVar(vtype=GRB.INTEGER, lb=0, ub=int(site_cap[z]),
                     name=f"x[{ZIPS[z]},{s}]") for z in Z for s in SIZES}
U = m.addVar(lb=0.0, name="U")  # upper ratio
L = m.addVar(lb=0.0, name="L")  # lower ratio

# capacity per zip
slots05  = {z: e05[z]  + quicksum(cap_0_5[s]*x[(z,s)] for s in SIZES) for z in Z}
slots512 = {z: e512[z] + quicksum(cap_5_12[s]*x[(z,s)] for s in SIZES) for z in Z}
slotsTot = {z: eTot[z] + quicksum(cap_total[s]*x[(z,s)] for s in SIZES) for z in Z}

# constraints
m.addConstr(quicksum(cost[s]*x[(z,s)] for z in Z for s in SIZES) <= BUDGET, "Budget")
for z in Z:
    m.addConstr(slots05[z] >= (2/3)*pop0_5[z], name=f"NYS_0_5_{ZIPS[z]}")
    m.addConstr(slotsTot[z] >= thresh[z]*pop0_12[z], name=f"NoDesert_{ZIPS[z]}")
    m.addConstr(slotsTot[z] <= U * pop0_12[z], name=f"FairMax_{ZIPS[z]}")
    m.addConstr(slotsTot[z] >= L * pop0_12[z], name=f"FairMin_{ZIPS[z]}")

# soft fairness: allow U-L but penalize in objective
lambda_penalty = 0.01
obj = quicksum(
    ((2/3)*(slots05[z]/pop0_5[z]) if pop0_5[z]>0 else 0) +
    ((1/3)*(slots512[z]/pop5_12[z]) if pop5_12[z]>0 else 0)
    for z in Z
)
m.setObjective(obj - lambda_penalty*(U - L), GRB.MAXIMIZE)



In [None]:
# solve
m.optimize()

# check infeasible zips
if m.Status in (GRB.INFEASIBLE, GRB.INF_OR_UNBD):
    print("\nModel infeasible, checking which ZIPs cause trouble...")
    m.computeIIS()
    bad_zip = []
    for c in m.getConstrs():
        if c.IISConstr:
            for tag in ("NYS_0_5_", "NoDesert_", "FairMax_", "FairMin_"):
                if tag in c.ConstrName:
                    bad_zip.append(c.ConstrName.split(tag)[-1])
    bad_zip = sorted(set(bad_zip))
    print(f" Problem ZIPs: {bad_zip}")
else:
    print(f"\n Model solved OK, Obj = {m.objVal:.3f}")

Gurobi Optimizer version 12.0.3 build v12.0.3rc0 (mac64[arm] - Darwin 24.6.0 24G90)

CPU model: Apple M4
Thread count: 10 physical cores, 10 logical processors, using up to 10 threads

Non-default parameters:
ScaleFlag  1
DualReductions  0
InfUnbdInfo  1
NumericFocus  3

Optimize a model with 5629 rows, 4223 columns and 23919 nonzeros
Model fingerprint: 0x334a14fa
Variable types: 2 continuous, 4221 integer (0 binary)
Coefficient statistics:
  Matrix range     [2e+00, 1e+05]
  Objective range  [4e-03, 1e+02]
  Bounds range     [1e+02, 1e+02]
  RHS range        [1e-01, 9e+09]
Found heuristic solution: objective 53493.204519
Presolve removed 1903 rows and 0 columns
Presolve time: 0.01s
Presolved: 3726 rows, 4223 columns, 17706 nonzeros
Variable types: 2 continuous, 4221 integer (0 binary)

Root relaxation: objective 8.415705e+05, 1038 iterations, 0.02 seconds (0.03 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Inc