In [1]:
# Import libraries
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import haversine as hs
import gurobipy as gp
from gurobipy import GRB

# Helper modules
import helper_population_allocation as pa
import helper_distance_calculation as dc

# Avoid printing set copy warnings
import warnings
warnings.filterwarnings("ignore")


In [None]:
#pip install pyproj

In [2]:
%%time

# Get the main buildings dataset 
buildings_df = gpd.read_file('../processed_data/relevant_buildings.shp')

# Create ID variable
buildings_df.reset_index(drop=True, inplace=True)
buildings_df['building_id'] = buildings_df.index + 1
buildings_df['building_id'] = buildings_df.apply(lambda row: str(row['building_id']) + '-' + str(row['CLASS']) , axis=1)

# buildings_df = buildings_df.sample(n=2000, random_state=1)  # Remove later


Wall time: 9.26 s


In [3]:
# Create arrays to track ordering (residential)
res_buildings = buildings_df[buildings_df['class_reco'].str.contains('Residential')]
res_buildings = res_buildings.sort_values('building_id')
res_buildings = dc.get_geocoordinate(res_buildings, 'geometry')

res_buildings_array = np.array(res_buildings['building_id'])
res_buildings_coordinates_array = np.array(res_buildings['coordinates'])

In [4]:
# Create arrays to track ordering (Commercial)
comm_buildings = buildings_df[buildings_df['class_reco'].str.contains('commercial')]
comm_buildings = comm_buildings.sort_values('building_id')
comm_buildings = dc.get_geocoordinate(comm_buildings, 'geometry')

comm_buildings_array = np.array(comm_buildings['building_id'])
comm_buildings_coordinates_array = np.array(comm_buildings['coordinates'])


In [5]:
comm_buildings_array

array(['100014-C', '100019-R', '100024-C', ..., '99973-R', '9999-C',
       '99991-C'], dtype=object)

In [6]:
# Create arrays to track ordering (grocery stores)
grocery_stores = buildings_df[buildings_df['class_reco'].str.contains('Grocery')]
grocery_stores = grocery_stores.sort_values('building_id')
grocery_stores = dc.get_geocoordinate(grocery_stores, 'geometry')

grocery_stores_array = np.array(grocery_stores['building_id'])
grocery_stores_coordinates_array = np.array(grocery_stores['coordinates'])


In [46]:
# Importing Distance Matrix
res_comm_distance_matrix = np.load('../processed_data/res_comm_distance_matrix.npy')

# Creating a modified res comm access matrix 
res_comm_access_matrix_half_mile = res_comm_distance_matrix.copy()
res_comm_access_matrix_half_mile[res_comm_access_matrix_half_mile <= 0.5] = 1
res_comm_access_matrix_half_mile[res_comm_access_matrix_half_mile != 1] = 0

In [47]:
%%time

# Create parameter matrices (Res groc access array - Aj)
# ith value indicates whether the ith residential building has existing access
res_groc_distance_matrix, res_groc_access_matrix = dc.calculate_access(res_buildings_coordinates_array, grocery_stores_coordinates_array)
res_access_array = np.amax(res_groc_access_matrix, 1)

res_groc_access_matrix_half_mile = res_groc_distance_matrix.copy()
res_groc_access_matrix_half_mile[res_groc_access_matrix_half_mile <= 0.5] = 1
res_groc_access_matrix_half_mile[res_groc_access_matrix_half_mile != 1] = 0

res_access_array_half_mile = np.amax(res_groc_access_matrix_half_mile, 1)


Wall time: 31.1 s


In [34]:
res_access_array_half_mile.shape

(109324,)

In [35]:
%%time

# Create parameter matrices (Res Population - Pj)
# ith value indicates the population in the ith column
res_population = pa.get_population(buildings_df) 
res_population = res_population.drop_duplicates('building_id') # drop duplicates

# Filling null values with mean (METHOD NEEDS UPDATE)
res_population['population'].fillna(value = res_population['population'].mean(), inplace=True)
res_population = res_population.sort_values('building_id') # Just to be safe
res_population_array = np.array(res_population['population'])
res_population_array.shape



Wall time: 19.4 s


(109324,)

In [36]:
buildings_df

Unnamed: 0,CLASS,class_reco,hood,geoid10,tractce10,geometry,building_id
0,C,1-Unit Residential,Brookline,420031918003,191800,"POLYGON ((-80.01230 40.38309, -80.01255 40.383...",1-C
1,R,1-Unit Residential,Brookline,420031918003,191800,"POLYGON ((-80.01382 40.38638, -80.01380 40.386...",2-R
2,R,1-Unit Residential,Brookline,420031918003,191800,"POLYGON ((-80.01309 40.38253, -80.01307 40.382...",3-R
3,R,1-Unit Residential,Brookline,420031918003,191800,"POLYGON ((-80.01399 40.38554, -80.01399 40.385...",4-R
4,R,1-Unit Residential,Brookline,420031918003,191800,"POLYGON ((-80.00736 40.38204, -80.00729 40.381...",5-R
...,...,...,...,...,...,...,...
116273,C,commercial,Point Breeze,420039811001,981100,"POLYGON ((-79.90934 40.44247, -79.90928 40.442...",116274-C
116274,R,2-Unit Residential,Squirrel Hill South,420039803001,980300,"POLYGON ((-79.91447 40.43197, -79.91443 40.431...",116275-R
116275,C,commercial,Squirrel Hill South,420039803001,980300,"POLYGON ((-79.90925 40.42686, -79.90910 40.426...",116276-C
116276,R,1-Unit Residential,Squirrel Hill South,420039803001,980300,"POLYGON ((-79.91190 40.43303, -79.91190 40.433...",116277-R


In [37]:
set(buildings_df['hood'])

{'Allegheny West',
 'Allentown',
 'Arlington',
 'Banksville',
 'Bedford Dwellings',
 'Beechview',
 'Beltzhoover',
 'Bloomfield',
 'Bluff',
 'Brighton Heights',
 'Brookline',
 'California-Kirkbride',
 'Carrick',
 'Central Business District',
 'Central Lawrenceville',
 'Central Northside',
 'Central Oakland',
 'Chateau',
 'Crafton Heights',
 'Crawford-Roberts',
 'Duquesne Heights',
 'East Allegheny',
 'East Hills',
 'East Liberty',
 'Elliott',
 'Fineview',
 'Friendship',
 'Garfield',
 'Greenfield',
 'Hazelwood',
 'Highland Park',
 'Homewood North',
 'Homewood South',
 'Homewood West',
 'Knoxville',
 'Larimer',
 'Lincoln Place',
 'Lincoln-Lemington-Belmar',
 'Lower Lawrenceville',
 'Manchester',
 'Marshall-Shadeland',
 'Middle Hill',
 'Morningside',
 'Mount Oliver Borough',
 'Mount Washington',
 'New Homestead',
 'North Oakland',
 'North Shore',
 'Northview Heights',
 'Oakwood',
 'Overbrook',
 'Perry North',
 'Perry South',
 'Point Breeze',
 'Point Breeze North',
 'Polish Hill',
 'Regent 

In [38]:
type(buildings_df["geoid10"].iloc[0])

str

In [52]:
buildings_df[buildings_df["hood"] == 'Central Oakland']

Unnamed: 0,CLASS,class_reco,hood,geoid10,tractce10,geometry,building_id
106625,R,4+ Unit Residential,Central Oakland,420030406001,040600,"POLYGON ((-79.95290 40.43715, -79.95285 40.437...",106626-R
106626,R,1-Unit Residential,Central Oakland,420030406001,040600,"POLYGON ((-79.95204 40.43634, -79.95196 40.436...",106627-R
106627,C,4+ Unit Residential,Central Oakland,420030406001,040600,"POLYGON ((-79.95416 40.43605, -79.95368 40.435...",106628-C
106628,R,4+ Unit Residential,Central Oakland,420030406001,040600,"POLYGON ((-79.95232 40.43959, -79.95225 40.439...",106629-R
106629,R,1-Unit Residential,Central Oakland,420030406001,040600,"POLYGON ((-79.95112 40.43504, -79.95112 40.435...",106630-R
...,...,...,...,...,...,...,...
110831,G,1-Unit Residential,Central Oakland,420030405001,040500,"POLYGON ((-79.95527 40.44006, -79.95536 40.440...",110832-G
110832,R,1-Unit Residential,Central Oakland,420030405001,040500,"POLYGON ((-79.95770 40.43940, -79.95781 40.439...",110833-R
110833,R,4+ Unit Residential,Central Oakland,420030405001,040500,"POLYGON ((-79.95562 40.43723, -79.95578 40.437...",110834-R
110834,C,4+ Unit Residential,Central Oakland,420030405001,040500,"POLYGON ((-79.95778 40.43929, -79.95793 40.439...",110835-C


In [55]:
comm_buildings_geoid = comm_buildings[comm_buildings["geoid10"] == str(420030406001)]

In [56]:
comm_buildings_geoid

Unnamed: 0,CLASS,class_reco,hood,geoid10,tractce10,geometry,building_id,centroids,coordinates
106689,C,commercial,Central Oakland,420030406001,40600,"POLYGON ((-1273837.333 6134383.310, -1273861.6...",106690-C,POINT (-79.95402 40.43706),"(-79.95401975064348, 40.4370613497148)"
106696,C,commercial,Central Oakland,420030406001,40600,"POLYGON ((-1273902.037 6134390.436, -1273922.7...",106697-C,POINT (-79.95450 40.43629),"(-79.95449505290206, 40.43628533842165)"
106705,R,commercial,Central Oakland,420030406001,40600,"POLYGON ((-1273865.404 6134392.836, -1273867.6...",106706-R,POINT (-79.95442 40.43691),"(-79.95442320108313, 40.43691410040844)"
106709,C,commercial,Central Oakland,420030406001,40600,"POLYGON ((-1273896.827 6134337.387, -1273904.1...",106710-C,POINT (-79.95397 40.43655),"(-79.95396959384149, 40.43655272115106)"
106748,C,commercial,Central Oakland,420030406001,40600,"POLYGON ((-1274095.919 6134301.495, -1274104.5...",106749-C,POINT (-79.95471 40.43478),"(-79.95471167210898, 40.43477913993961)"
106752,R,commercial,Central Oakland,420030406001,40600,"POLYGON ((-1273908.497 6134418.991, -1273913.4...",106753-R,POINT (-79.95495 40.43660),"(-79.95495181324692, 40.43660080010114)"
106809,I,commercial,Central Oakland,420030406001,40600,"POLYGON ((-1273890.887 6134329.096, -1273884.8...",106810-I,POINT (-79.95382 40.43673),"(-79.95382242168988, 40.436728263264605)"
106850,C,commercial,Central Oakland,420030406001,40600,"POLYGON ((-1273945.373 6134422.927, -1273930.8...",106851-C,POINT (-79.95493 40.43639),"(-79.95492938608182, 40.43638592783314)"
106852,R,commercial,Central Oakland,420030406001,40600,"POLYGON ((-1273934.196 6134419.777, -1273943.3...",106853-R,POINT (-79.95506 40.43670),"(-79.95506313108045, 40.43669967595327)"
106866,R,commercial,Central Oakland,420030406001,40600,"POLYGON ((-1273883.880 6134363.320, -1273884.5...",106867-R,POINT (-79.95419 40.43673),"(-79.95418599945252, 40.43673486628353)"


In [57]:
random_build = comm_buildings_geoid.sample(n=1, random_state= 29)

In [58]:
random_build 

Unnamed: 0,CLASS,class_reco,hood,geoid10,tractce10,geometry,building_id,centroids,coordinates
106748,C,commercial,Central Oakland,420030406001,40600,"POLYGON ((-1274095.919 6134301.495, -1274104.5...",106749-C,POINT (-79.95471 40.43478),"(-79.95471167210898, 40.43477913993961)"


In [60]:
np.where(comm_buildings_array == '106749-C')

(array([491], dtype=int64),)

In [61]:
# Gurobi takes forever to optimize since it relies on for loops. So we are going to do the optimization manually. 

###########################
# STEP 1: Take the res_comm_access_matrix, remove those rows (each row represents a residential building) which have existing access
###########################
existing_access_indices = res_access_array_half_mile.nonzero()[0] # These are indices of residential buildings that currently have access
res_comm_access_matrix_subset = np.delete(res_comm_access_matrix_half_mile, existing_access_indices, axis=0 )

###########################
# STEP 2: Do the same thing for res_population_array so that the ordering matches
###########################
res_population_array_sub = np.delete(res_population_array, existing_access_indices, axis=0)

###########################
# STEP 3: Do a matrix multiplication between res_population_array_sub and res_comm_access_matrix_sub
###########################

# How this works:

# 1. Reshape res_population_array_sub to be (1 * 2780) 2D array
# 2. res_comm_access_matrix_sub is (2780 * 6895)
# 3. When you do matrix multiplication of 1 and 2, you get a (1*6895) array
# 4. Each element of this array would represent the sum of the population at each residential building multiplied by whether that residential building and that particular commercial building
# are within access region. So for example, first element of this result would be P0 * whether res building 0 and comm building 0 are within access + P1 * whether res building 1 and comm building 0 are within access and so on
# So each element of the result represents the total new population that would gain access if a commercial building is put at that index

res_population_array_sub = np.reshape(res_population_array_sub, (-1, len(res_population_array_sub)))
new_access_array = np.matmul(res_population_array_sub, res_comm_access_matrix_subset)

In [50]:
new_access_array

array([[   0.  ,  386.04,    0.  , ...,  395.32, 3846.35,    0.  ]])

In [62]:
new_access_array[0, 491]

1976.5699999999997