In [1]:
# Import libraries
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import haversine as hs
import gurobipy as gp
from gurobipy import GRB

# Helper modules
import helper_population_allocation as pa
import helper_distance_calculation as dc

# Avoid printing set copy warnings
import warnings
warnings.filterwarnings("ignore")

c:\Users\mihir\anaconda3\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
c:\Users\mihir\anaconda3\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll
c:\Users\mihir\anaconda3\lib\site-packages\numpy\.libs\libopenblas.XWYDX2IKJW2NMTWSFYNGFUWKQU3LYTCZ.gfortran-win_amd64.dll


In [5]:
# Get the main buildings dataset 
buildings_df = gpd.read_file('../processed_data/spatial_data/relevant_buildings.shp')

# Create ID variable
buildings_df.reset_index(drop=True, inplace=True)
buildings_df['building_id'] = buildings_df.index + 1
buildings_df['building_id'] = buildings_df.apply(lambda row: str(row['building_id']) + '-' + str(row['CLASS']) , axis=1)

buildings_df = buildings_df.sample(n=2000, random_state=1)  # Remove later


# Population parameter (Pj)
res_population = pa.get_population(geopandas_dataframe=buildings_df) 

# Existing access parameter (Aj)
res_groc_access =  dc.calculate_access(
                            geopandas_dataframe=buildings_df,
                            building_type_1='Residential',
                            building_type_2='Grocery',
                            identifier_column='class_reco', 
                            geo_column='geometry', 
                            output_format='dataframe'
)



modified script


In [6]:
# Residential- commercial access parameter (Bij)
# Run this once, save dataset (will take forever)
res_comm_access = dc.calculate_access(
                            geopandas_dataframe=buildings_df,
                            building_type_1='Residential',
                            building_type_2='commercial',
                            identifier_column='class_reco', 
                            geo_column='geometry', 
                            output_format='dataframe'
)


modified script


In [7]:
# Sort data, create arrays for gurobi optimization so that ordering is maintained

# Res population
res_population = res_population.sort_values('building_id')
res_population_array = np.array(res_population[res_population['class_reco'].str.contains('Residential')]['population']) # ith entry corresponds to population at ith residential building 

# Res access
res_groc_access = res_groc_access[['building_idResidential', 'access']].groupby('building_idResidential').max('access').sort_values('building_idResidential')
res_access_array = np.array(res_groc_access['access']) # ith entry corresponds to existing access at ith residential building 

# Res comm access (we will not create a matrix because it messes up ordering. We will directly use the values from the dataframe, but will sort it)
res_comm_access = res_comm_access.sort_values(by=['building_idResidential', 'building_idcommercial'])

# create a sorted list of all residential building ids
res_buildings = np.array(res_population['building_id'])

# Create a sorted list of all commercial buildings
# Just picking one residential building ID and getting all the associated commercial building IDs for that would do the job
# Since this dataframe contains a unique row per residential commercial building pair, sorted by (residential, commercial)
comm_buildings = np.array(res_comm_access[res_comm_access['building_idResidential'] == '100022-R']['building_idcommercial'])

In [54]:
# Create a matrix because optimization takes too long otherwise. 
# Should brainstorm how to make this matrix creation faster. I used pandas.pivot and that works but messes up the ordering

res_comm_distance_matrix = np.zeros((len(res_buildings), len(comm_buildings)))

for i in range(len(res_buildings)):
    building_id = res_buildings[i]
    insert_array = np.array(res_comm_access[res_comm_access['building_idResidential'] == building_id]['distance'])

    res_comm_distance_matrix[i] = insert_array

res_comm_distance_matrix

array([[1.85729108, 0.6263021 , 0.65820969, ..., 7.09482637, 0.17846824,
        0.08355205],
       [1.91416127, 0.67268775, 0.70031637, ..., 7.15618594, 0.2409595 ,
        0.14514537],
       [1.96117625, 0.71756449, 0.743699  , ..., 7.2023988 , 0.28409485,
        0.18729035],
       ...,
       [2.229564  , 0.9779635 , 0.99815464, ..., 7.46600528, 0.54249197,
        0.44663433],
       [2.21136671, 0.9552918 , 0.97393582, ..., 7.45370979, 0.53126633,
        0.43468327],
       [2.14001173, 0.89552137, 0.91902741, ..., 7.37288884, 0.44934988,
        0.353758  ]])

In [61]:
# Figure out thresholding later

# np.where(res_comm_distance_matrix <= 1)

# res_comm_distance_matrix <= 1


# pairings = {(c, r): res_comm_distance_matrix
#             for facility in range(num_candidates)
#             for cluster in range(num_clusters) 
#             if  dist(facility_locs[facility], centroids[cluster]) < threshold}
# print("Number of viable pairings: {0}".format(len(pairings.keys())))

(array([   0,    0,    0, ..., 1880, 1880, 1880], dtype=int64),
 array([  1,   2,   3, ..., 111, 113, 114], dtype=int64))

In [97]:
res_comm_distance_matrix.shape

(1881, 115)

In [99]:
len(res_population_array)

1881

In [95]:
### Testing modeling stuff

# Create demand matrix - number of unsatisfied customers
res_demand = res_population_array
res_demand[np.where(res_access_array == 1)] = 0 # setting effective demand to zero for buildings that have existing access to a grocery store

# Parameters
num_commercial_buildings = res_comm_distance_matrix.shape[1]
num_residential_buildings = len(res_demand)
num_stores = 3

# Implement model
m = gp.Model('Facility location')

# Decision variables 
select = m.addVars(range(num_commercial_buildings), vtype=GRB.BINARY, name='select') # select location
assign = m.addVars(range(num_residential_buildings), range(num_commercial_buildings), vtype=GRB.BINARY, name='assign') # assignment of residential building to cluster

# Objective function - min total distance from residential buildings to their assigned grocery store, multiplied by demand
m.setObjective(sum(sum(res_demand[i] * res_comm_distance_matrix[i,j] * assign[i, j] for i in range(num_residential_buildings)) for j in range(num_commercial_buildings)))
m.modelSense = GRB.MINIMIZE

# Constraints
m.addConstr(sum(select[i] for i in range(len(select))) <= num_stores, name='store_limit')

#m.addConstr(select.sum() <= num_stores, name = 'store_limit') # Facility limit
for i in range(num_residential_buildings):
    m.addConstr(sum(assign[i,j] for j in range(num_commercial_buildings)) ==  1) # can only assign each residential building to one store

for i in range(num_residential_buildings):
    for j in range(num_commercial_buildings):
        m.addConstr(assign[i,j] <= select[j], name='open2assign') # locations can only be assigned demand if they are selected

# Optimize
m.optimize()


Gurobi Optimizer version 9.5.2 build v9.5.2rc0 (win64)
Thread count: 4 physical cores, 8 logical processors, using up to 8 threads
Optimize a model with 216317 rows, 216430 columns and 432860 nonzeros
Model fingerprint: 0x15928328
Variable types: 0 continuous, 216430 integer (216430 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [8e-03, 1e+02]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 3e+00]
Found heuristic solution: objective 0.0000000

Explored 0 nodes (0 simplex iterations) in 0.05 seconds (0.02 work units)
Thread count was 1 (of 8 available processors)

Solution count 1: 0 

Optimal solution found (tolerance 1.00e-04)
Best objective 0.000000000000e+00, best bound 0.000000000000e+00, gap 0.0000%


In [90]:
m.objVal

0.45941713065146883

In [91]:
counter = 0
for i in range(len(select)):
    counter += select[i].x

counter

1.0