In [1]:
import pyomo.environ as pyo
import pandas as pd
from pathlib import Path
import preprocess_data as ppd
from itertools import zip_longest

# from Camm18 paper to verify obtained results later
known_x_sols = [
    [8, 14, 17, 19, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87],
    [4, 8, 14, 18, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87],
    [8, 14, 17, 18, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87],
]

## 1. Solve Camm18's network location problem for 2011 data {-}

### Data import {-}

In [2]:
data_path = Path("../data/")
output_path = Path("../output/")
df, adjacent_matrix = ppd.get_df_adj(data_path, 2021)

### Constraint equation definitions {-}

In [3]:
def param_adjacent(m, i, j):
    return int(j in adjacent_matrix[i])

def param_pop_2011(m, i):
    return df["population_2010"][i - 1]

def param_pop_2020(m, i):
    return df["population_2020"][i - 1]

def con_a(m, i):
    return sum((m.a[i, j] * m.x[j]) for j in m.J) >= m.y[i]

def con_x(m):
    return sum(m.x[j] for j in m.J) <= m.k

def obj_sum(m):
    return pyo.summation(m.p, m.y)

### Model setup {-}

In [4]:
def instantiate_model(name, year=2011, k=15):
    name = pyo.AbstractModel()
    
    # value of n (number of counties)
    name.n = 88

    # limit on number of pirincipal places of buisnesses opened (init to 5)
    name.k = k

    # range of i and j (iterating over counties)
    name.I = pyo.RangeSet(1, name.n)
    name.J = pyo.RangeSet(1, name.n)

    if year==2011:
        name.p = pyo.Param(name.I, initialize=param_pop_2011)
    elif year==2020:
        name.p = pyo.Param(name.I, initialize=param_pop_2020)  # population of county i

    name.a = pyo.Param(
        name.I, name.J, domain=pyo.Binary, initialize=param_adjacent
    )  # 1 if county i and j are adjacent

    name.x = pyo.Var(
        name.J, domain=pyo.Binary
    )  # 1 if principal place of business is opened in county j
    name.y = pyo.Var(name.I, domain=pyo.Binary)  # 1 if county i is covered

    name.obj = pyo.Objective(rule=obj_sum, sense=pyo.maximize)

    name.a_constraint = pyo.Constraint(name.I, rule=con_a)
    
    name.x_constraint = pyo.Constraint(rule=con_x)
    
    return name

### First solution {-}

In [5]:
model = instantiate_model("model", 2011)
# opt = pyo.SolverFactory("ipopt", executable="/home/adb/anaconda3/bin/ipopt")
opt = pyo.SolverFactory("glpk", executable="/home/adb/anaconda3/bin/glpsol")
# opt = pyo.SolverFactory("glpk", executable="/usr/local/Caskroom/miniconda/base/bin/glpsol")

In [6]:
instance1 = model.create_instance()
results1 = opt.solve(instance1)

In [7]:
def get_solutions(instance):
    sol_dict = instance.x.get_values()
    sol = [k for k, v in sol_dict.items() if v == 1]
    return sol

def get_solution_set_inverse(solution):
    return set([i for i in range(1, model.n + 1)]) - set(solution)

In [8]:
sol1 = get_solutions(instance1)
sol1_compare = sol1
inverse_sol1 = get_solution_set_inverse(sol1)
sol1_camm = ppd.county_ids_to_camm_ids(df, sol1)
pd.Series(sol1).to_csv(
    output_path / "solution_main_model_sol1.csv", index=False, header=["county_id"]
)

In [9]:
print(f"Obtained Solution 1 with Camm18 indexing method: \n{sol1_camm}")
print(f"Solution 1 from Camm18 paper: \n{known_x_sols[0]}")

Obtained Solution 1: 
[3, 5, 7, 8, 9, 26, 28, 40, 45, 49, 51, 69, 72, 75, 76]
Obtained Solution 1 with Camm18 indexing method: 
[8, 14, 17, 19, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87]
Solution 1 from Camm18 paper: 
[8, 14, 17, 19, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87]


### Re-solve for 2nd solution {-}

In [10]:
def con_x_sol2(m):
    return sum(m.x[i] for i in sol1) - sum(m.x[j] for j in inverse_sol1) <= len(sol1) - 1


model.sol2_constraint = pyo.Constraint(rule=con_x_sol2)

instance2 = model.create_instance()
results2 = opt.solve(instance2)

In [11]:
sol2 = get_solutions(instance2)
inverse_sol2 = get_solution_set_inverse(sol2)
sol2_camm = ppd.county_ids_to_camm_ids(df, sol2)
pd.Series(sol2).to_csv(
    output_path / "solution_main_model_sol2.csv", index=False, header=["county_id"]
)

In [12]:
print(f"Obtained Solution 2: \n{sol2_camm}")
print(f"Solution 2 from Camm18 paper: \n{known_x_sols[2]}")

Obtained Solution 2: 
[8, 14, 17, 18, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87]
Solution 2 from Camm18 paper: 
[8, 14, 17, 18, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87]


### Re-solve for 3rd solution {-}

In [13]:
def con_x_sol3(m):
    return sum(m.x[i] for i in sol2) - sum(m.x[j] for j in inverse_sol2) <= len(sol2) - 1


model.sol3_constraint = pyo.Constraint(rule=con_x_sol3)

instance3 = model.create_instance()
results3 = opt.solve(instance3)

In [14]:
sol3 = get_solutions(instance3)
inverse_sol3 = get_solution_set_inverse(sol3)
sol3_camm = ppd.county_ids_to_camm_ids(df, sol3)
pd.Series(sol3).to_csv(
    output_path / "solution_main_model_sol3.csv", index=False, header=["county_id"]
)

In [15]:
print(f"Obtained Solution 3\n{sol3_camm}")
print(f"Solution 3 from Camm18 paper: \n{known_x_sols[1]}")

Obtained Solution 3
[4, 8, 14, 18, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87]
Solution 3 from Camm18 paper: 
[4, 8, 14, 18, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87]


### Verify obtained solutions with ones from the Camm18 paper {-}

In [16]:
print(f"Solution 1 \nResult:{sol1_camm} \nCamm18:{known_x_sols[0]} \n ----")
print(f"Solution 2 \nResult:{sol2_camm} \nCamm18:{known_x_sols[1]} \n ----")
print(f"Solution 3 \nResult:{sol3_camm} \nCamm18:{known_x_sols[2]} \n ----")

Solution 1 
Result:[8, 14, 17, 19, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87] 
Camm18:[8, 14, 17, 19, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87] 
 ----
Solution 2 
Result:[8, 14, 17, 18, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87] 
Camm18:[4, 8, 14, 18, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87] 
 ----
Solution 3 
Result:[4, 8, 14, 18, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87] 
Camm18:[8, 14, 17, 18, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87] 
 ----


## 2. Solve the same problem for 2020 data {-}

In [17]:
model_2020 = instantiate_model("model2020", 2020)

instance1 = model_2020.create_instance()
results1 = opt.solve(instance1)

sol1 = get_solutions(instance1)
inverse_sol1 = get_solution_set_inverse(sol1)
sol1_camm = ppd.county_ids_to_camm_ids(df, sol1)

model_2020.sol2_constraint = pyo.Constraint(rule=con_x_sol2)

instance2 = model_2020.create_instance()
results2 = opt.solve(instance2)

sol2 = get_solutions(instance2)
inverse_sol2 = get_solution_set_inverse(sol2)
sol2_camm = ppd.county_ids_to_camm_ids(df, sol2)

model_2020.sol3_constraint = pyo.Constraint(rule=con_x_sol3)

instance3 = model_2020.create_instance()
results3 = opt.solve(instance3)

sol3 = get_solutions(instance3)
inverse_sol3 = get_solution_set_inverse(sol3)
sol3_camm = ppd.county_ids_to_camm_ids(df, sol3)

print(f"Solution 1 \nResult:{sol1_camm} \nCamm18:{known_x_sols[1]} \n ----")
print(f"Solution 2 \nResult:{sol2_camm} \nCamm18:{known_x_sols[0]} \n ----")
print(f"Solution 3 \nResult:{sol3_camm} \nCamm18:{known_x_sols[2]} \n ----")

Solution 1 
Result:[4, 8, 14, 18, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87] 
Camm18:[4, 8, 14, 18, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87] 
 ----
Solution 2 
Result:[8, 14, 17, 18, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87] 
Camm18:[8, 14, 17, 19, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87] 
 ----
Solution 3 
Result:[8, 14, 17, 19, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87] 
Camm18:[8, 14, 17, 18, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87] 
 ----


We observe that the $x_i$ solution sets are indentical for both the 2020 and 2011 population datasets.

## 3. Verify Camm18's 3 year-implementation method {-}

We use the first solution set from the 2011 data to verify the 3 year-implementation method from the paper. We will instantiate a new model using our previously created template but with `k=5` which will constrain the number of solutions $x_i$ to 5 for each run.

In [18]:
solution_set_initial = sol1_compare
solution_set_initial_camm = ppd.county_ids_to_camm_ids(df, solution_set_initial)

## Year 1
model1 = instantiate_model("model_verify", 2011, k=5)
instance1 = model1.create_instance()

# We fix the all solutions not in initial solution set to 0
for i in get_solution_set_inverse(solution_set_initial):
    instance1.x[i].fix(0)

results1 = opt.solve(instance1)
sol1 = get_solutions(instance1)
inverse_sol1 = get_solution_set_inverse(sol1)
sol1_camm = ppd.county_ids_to_camm_ids(df, sol1)

## Year 2
model2 = instantiate_model("model_verify", 2011, k=10)
instance2 = model2.create_instance()

## We fix x_i to 1 for the solution set from year 1
for i in get_solution_set_inverse(solution_set_initial):
    instance1.x[i].fix(0)
for i in sol1:
    instance2.x[i].fix(1)

results2 = opt.solve(instance2)
sol2 = get_solutions(instance2)
inverse_sol2 = get_solution_set_inverse(sol2)
sol2_camm = ppd.county_ids_to_camm_ids(df, sol2)

## Year 3
model3 = instantiate_model("model_verify", 2011, k=15)
instance3 = model3.create_instance()

## We fix x_i to 1 for the solution set from year 2
for i in get_solution_set_inverse(solution_set_initial):
    instance1.x[i].fix(0)
for i in sol1:
    instance2.x[i].fix(1)
for i in sol2:
    instance3.x[i].fix(1)

results3 = opt.solve(instance3)
sol3 = get_solutions(instance3)
inverse_sol3 = get_solution_set_inverse(sol3)
sol3_camm = ppd.county_ids_to_camm_ids(df, sol3)

extraneous = set(solution_set_initial_camm) - set(sol3_camm)
missing = set(sol3_camm) - set(solution_set_initial_camm)

In [19]:
print(f"Initial solution set: \n        {solution_set_initial_camm}")
print(f"Our calculated 3 year glide path:")
print(f"Year 1: {sol1_camm}")
print(f"Year 2: {sol2_camm}")
print(f"Year 3: {sol3_camm}")

Initial solution set: 
        [8, 14, 17, 19, 29, 33, 35, 38, 51, 55, 59, 68, 75, 78, 87]
Our calculated 3 year glide path:
Year 1: [8, 55, 59, 75, 78]
Year 2: [8, 15, 21, 25, 28, 51, 55, 59, 75, 78]
Year 3: [8, 15, 18, 21, 25, 28, 41, 42, 51, 55, 59, 68, 75, 78, 87]


Our calculated first year solution is completely contained in the initial solution set from part 2. However the solutions for the second and third years diverge as follows:

In [20]:
print(f"Counties missing from our year 3 solution that are in the initial solution set: \n{missing}")
print(f"Counties included in our year 3 solution that are not the initial solution set: \n{extraneous}")

Counties missing from our year 3 solution that are in the initial solution set: 
{41, 42, 15, 18, 21, 25, 28}
Counties included in our year 3 solution that are not the initial solution set: 
{33, 35, 38, 14, 17, 19, 29}
