In [1]:
# Import libraries
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import haversine as hs
import gurobipy as gp
from gurobipy import GRB

# Helper modules
import helper_population_allocation as pa
import helper_distance_calculation as dc

# Avoid printing set copy warnings
import warnings
warnings.filterwarnings("ignore")


In [2]:
%%time

# Get the main buildings dataset 
buildings_df = gpd.read_file('../processed_data/relevant_buildings.shp')

# Create ID variable
buildings_df.reset_index(drop=True, inplace=True)
buildings_df['building_id'] = buildings_df.index + 1
buildings_df['building_id'] = buildings_df.apply(lambda row: str(row['building_id']) + '-' + str(row['CLASS']) , axis=1)

# buildings_df = buildings_df.sample(n=2000, random_state=1)  # Remove later


Wall time: 5.83 s


In [3]:
# Create arrays to track ordering (residential)
res_buildings = buildings_df[buildings_df['class_reco'].str.contains('Residential')]
res_buildings = res_buildings.sort_values('building_id')
res_buildings = dc.get_geocoordinate(res_buildings, 'geometry')

res_buildings_array = np.array(res_buildings['building_id'])
res_buildings_coordinates_array = np.array(res_buildings['coordinates'])

In [4]:
# Create arrays to track ordering (Commercial)
comm_buildings = buildings_df[buildings_df['class_reco'].str.contains('commercial')]
comm_buildings = comm_buildings.sort_values('building_id')
comm_buildings = dc.get_geocoordinate(comm_buildings, 'geometry')

comm_buildings_array = np.array(comm_buildings['building_id'])
comm_buildings_coordinates_array = np.array(comm_buildings['coordinates'])


In [33]:
comm_buildings_array

array(['100014-C', '100019-R', '100024-C', ..., '99973-R', '9999-C',
       '99991-C'], dtype=object)

In [5]:
# Create arrays to track ordering (grocery stores)
grocery_stores = buildings_df[buildings_df['class_reco'].str.contains('Grocery')]
grocery_stores = grocery_stores.sort_values('building_id')
grocery_stores = dc.get_geocoordinate(grocery_stores, 'geometry')

grocery_stores_array = np.array(grocery_stores['building_id'])
grocery_stores_coordinates_array = np.array(grocery_stores['coordinates'])


In [6]:
res_comm_access_matrix = np.load('../processed_data/res_comm_access_matrix.npy')
res_comm_distance_matrix = np.load('../processed_data/res_comm_distance_matrix.npy')

In [7]:
%%time

# Create parameter matrices (Res groc access array - Aj)
# ith value indicates whether the ith residential building has existing access
res_groc_distance_matrix, res_groc_access_matrix = dc.calculate_access(res_buildings_coordinates_array, grocery_stores_coordinates_array)
res_access_array = np.amax(res_groc_access_matrix, 1)


Wall time: 18.7 s


In [8]:
%%time

# Create parameter matrices (Res Population - Pj)
# ith value indicates the population in the ith column
res_population = pa.get_population(buildings_df) 
res_population = res_population.drop_duplicates('building_id') # drop duplicates

# Filling null values with mean (METHOD NEEDS UPDATE)
res_population['population'].fillna(value = res_population['population'].mean(), inplace=True)
res_population = res_population.sort_values('building_id') # Just to be safe
res_population_array = np.array(res_population['population'])
res_population_array.shape



Wall time: 13.7 s


(109324,)

In [9]:
buildings_df

Unnamed: 0,CLASS,class_reco,hood,geoid10,tractce10,geometry,building_id
0,C,1-Unit Residential,Brookline,420031918003,191800,"POLYGON ((-80.01230 40.38309, -80.01255 40.383...",1-C
1,R,1-Unit Residential,Brookline,420031918003,191800,"POLYGON ((-80.01382 40.38638, -80.01380 40.386...",2-R
2,R,1-Unit Residential,Brookline,420031918003,191800,"POLYGON ((-80.01309 40.38253, -80.01307 40.382...",3-R
3,R,1-Unit Residential,Brookline,420031918003,191800,"POLYGON ((-80.01399 40.38554, -80.01399 40.385...",4-R
4,R,1-Unit Residential,Brookline,420031918003,191800,"POLYGON ((-80.00736 40.38204, -80.00729 40.381...",5-R
...,...,...,...,...,...,...,...
116273,C,commercial,Point Breeze,420039811001,981100,"POLYGON ((-79.90934 40.44247, -79.90928 40.442...",116274-C
116274,R,2-Unit Residential,Squirrel Hill South,420039803001,980300,"POLYGON ((-79.91447 40.43197, -79.91443 40.431...",116275-R
116275,C,commercial,Squirrel Hill South,420039803001,980300,"POLYGON ((-79.90925 40.42686, -79.90910 40.426...",116276-C
116276,R,1-Unit Residential,Squirrel Hill South,420039803001,980300,"POLYGON ((-79.91190 40.43303, -79.91190 40.433...",116277-R


In [10]:
set(buildings_df['hood'])

{'Allegheny West',
 'Allentown',
 'Arlington',
 'Banksville',
 'Bedford Dwellings',
 'Beechview',
 'Beltzhoover',
 'Bloomfield',
 'Bluff',
 'Brighton Heights',
 'Brookline',
 'California-Kirkbride',
 'Carrick',
 'Central Business District',
 'Central Lawrenceville',
 'Central Northside',
 'Central Oakland',
 'Chateau',
 'Crafton Heights',
 'Crawford-Roberts',
 'Duquesne Heights',
 'East Allegheny',
 'East Hills',
 'East Liberty',
 'Elliott',
 'Fineview',
 'Friendship',
 'Garfield',
 'Greenfield',
 'Hazelwood',
 'Highland Park',
 'Homewood North',
 'Homewood South',
 'Homewood West',
 'Knoxville',
 'Larimer',
 'Lincoln Place',
 'Lincoln-Lemington-Belmar',
 'Lower Lawrenceville',
 'Manchester',
 'Marshall-Shadeland',
 'Middle Hill',
 'Morningside',
 'Mount Oliver Borough',
 'Mount Washington',
 'New Homestead',
 'North Oakland',
 'North Shore',
 'Northview Heights',
 'Oakwood',
 'Overbrook',
 'Perry North',
 'Perry South',
 'Point Breeze',
 'Point Breeze North',
 'Polish Hill',
 'Regent 

In [11]:
type(buildings_df["geoid10"].iloc[0])

str

In [103]:
buildings_df[buildings_df["hood"] == 'Chateau']

Unnamed: 0,CLASS,class_reco,hood,geoid10,tractce10,geometry,building_id
86049,C,commercial,Chateau,420039806001,980600,"POLYGON ((-80.02913 40.45316, -80.02909 40.453...",86050-C
86050,C,commercial,Chateau,420039806001,980600,"POLYGON ((-80.03458 40.45813, -80.03471 40.458...",86051-C
86051,C,commercial,Chateau,420039806001,980600,"POLYGON ((-80.02795 40.44853, -80.02812 40.448...",86052-C
86052,G,commercial,Chateau,420039806001,980600,"POLYGON ((-80.02145 40.45010, -80.02157 40.450...",86053-G
86053,C,commercial,Chateau,420039806001,980600,"POLYGON ((-80.03520 40.46067, -80.03508 40.460...",86054-C
...,...,...,...,...,...,...,...
86108,C,commercial,Chateau,420039806001,980600,"POLYGON ((-80.03171 40.45491, -80.03141 40.454...",86109-C
86109,C,commercial,Chateau,420039806001,980600,"POLYGON ((-80.03467 40.46147, -80.03465 40.461...",86110-C
86110,C,commercial,Chateau,420039806001,980600,"POLYGON ((-80.03459 40.45945, -80.03391 40.459...",86111-C
86111,C,commercial,Chateau,420039806001,980600,"POLYGON ((-80.03070 40.45635, -80.03067 40.456...",86112-C


In [104]:
comm_buildings_geoid = comm_buildings[comm_buildings["geoid10"] == str(	420039806001)]

In [105]:
comm_buildings_geoid

Unnamed: 0,CLASS,class_reco,hood,geoid10,tractce10,geometry,building_id,centroids,coordinates
86049,C,commercial,Chateau,420039806001,980600,"POLYGON ((-1273355.915 6141530.364, -1273357.0...",86050-C,POINT (-80.02913 40.45312),"(-80.02912620260797, 40.45312462376988)"
86050,C,commercial,Chateau,420039806001,980600,"POLYGON ((-1272929.930 6142240.935, -1272932.9...",86051-C,POINT (-80.03466 40.45817),"(-80.03466374601334, 40.45816879322774)"
86051,C,commercial,Chateau,420039806001,980600,"POLYGON ((-1273811.257 6141198.619, -1273802.0...",86052-C,POINT (-80.02799 40.44861),"(-80.0279941270327, 40.44860940736549)"
86052,G,commercial,Chateau,420039806001,980600,"POLYGON ((-1273551.766 6140727.000, -1273515.1...",86053-G,POINT (-80.02125 40.45055),"(-80.02124757347613, 40.450549048691094)"
86053,C,commercial,Chateau,420039806001,980600,"POLYGON ((-1272680.362 6142420.054, -1272702.4...",86054-C,POINT (-80.03518 40.46054),"(-80.03517622401495, 40.46054123477853)"
...,...,...,...,...,...,...,...,...,...
86108,C,commercial,Chateau,420039806001,980600,"POLYGON ((-1273215.635 6141836.807, -1273205.2...",86109-C,POINT (-80.03150 40.45474),"(-80.0314955826945, 40.45474308283924)"
86109,C,commercial,Chateau,420039806001,980600,"POLYGON ((-1272590.806 6142415.301, -1272596.7...",86110-C,POINT (-80.03492 40.46109),"(-80.03491871468576, 40.461093483987625)"
86110,C,commercial,Chateau,420039806001,980600,"POLYGON ((-1272795.328 6142307.635, -1272771.9...",86111-C,POINT (-80.03414 40.45917),"(-80.03414188236128, 40.4591664177445)"
86111,C,commercial,Chateau,420039806001,980600,"POLYGON ((-1273053.735 6141822.949, -1273052.5...",86112-C,POINT (-80.03104 40.45662),"(-80.03103745170897, 40.45661566804498)"


In [110]:
random_build = comm_buildings_geoid.sample(n=1, random_state=37)

In [111]:
random_build 

Unnamed: 0,CLASS,class_reco,hood,geoid10,tractce10,geometry,building_id,centroids,coordinates
86078,C,commercial,Chateau,420039806001,980600,"POLYGON ((-1273285.323 6141698.690, -1273256.3...",86079-C,POINT (-80.03023 40.45435),"(-80.03023300890958, 40.45435129722855)"


In [112]:
np.where(comm_buildings_array == '86079-C')

(array([6039], dtype=int64),)

In [72]:
# Gurobi takes forever to optimize since it relies on for loops. So we are going to do the optimization manually. 

###########################
# STEP 1: Take the res_comm_access_matrix, remove those rows (each row represents a residential building) which have existing access
###########################
existing_access_indices = res_access_array.nonzero()[0] # These are indices of residential buildings that currently have access
res_comm_access_matrix_subset = np.delete(res_comm_access_matrix, existing_access_indices, axis=0 )

###########################
# STEP 2: Do the same thing for res_population_array so that the ordering matches
###########################
res_population_array_sub = np.delete(res_population_array, existing_access_indices, axis=0)

###########################
# STEP 3: Do a matrix multiplication between res_population_array_sub and res_comm_access_matrix_sub
###########################

# How this works:

# 1. Reshape res_population_array_sub to be (1 * 2780) 2D array
# 2. res_comm_access_matrix_sub is (2780 * 6895)
# 3. When you do matrix multiplication of 1 and 2, you get a (1*6895) array
# 4. Each element of this array would represent the sum of the population at each residential building multiplied by whether that residential building and that particular commercial building
# are within access region. So for example, first element of this result would be P0 * whether res building 0 and comm building 0 are within access + P1 * whether res building 1 and comm building 0 are within access and so on
# So each element of the result represents the total new population that would gain access if a commercial building is put at that index

res_population_array_sub = np.reshape(res_population_array_sub, (-1, len(res_population_array_sub)))
new_access_array = np.matmul(res_population_array_sub, res_comm_access_matrix_subset)

In [121]:
new_access_array

array([[   0.  , 1589.16,    0.  , ..., 1601.88,    0.  ,    0.  ]])

In [113]:
new_access_array[0,6039]

0.0