In [3]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import haversine as hs

# Helper modules
import helper_population_allocation as pa
import helper_distance_calculation as dc



In [4]:
buildings_df = gpd.read_file('../processed_data/relevant_buildings.shp')


In [5]:
res_comm_access_buildings = np.load('../processed_data/res_comm_access_matrix.npy')
res_comm_distance_matrix = np.load('../processed_data/res_comm_distance_matrix.npy')

In [6]:
# Create ID variable
buildings_df.reset_index(drop=True, inplace=True)
buildings_df['building_id'] = buildings_df.index + 1
buildings_df['building_id'] = buildings_df.apply(lambda row: str(row['building_id']) + '-' + str(row['CLASS']) , axis=1)

# Population parameter (Pj)
res_population = pa.get_population(geopandas_dataframe=buildings_df) 

ImportError: Spatial indexes require either `rtree` or `pygeos`. See installation instructions at https://geopandas.org/install.html

In [None]:
buildings_df

In [None]:
set(buildings_df['hood'])

In [None]:
type(buildings_df["geoid10"].iloc[0])

In [None]:
buildings_df[buildings_df["hood"] == 'Central Oakland']

In [None]:
res_comm_access_buildings 

In [None]:
res_comm_distance_matrix

In [None]:
res_population

In [None]:
# Create arrays to track ordering (residential)
res_buildings = buildings_df[buildings_df['class_reco'].str.contains('Residential')]
res_buildings = res_buildings.sort_values('building_id')
res_buildings = dc.get_geocoordinate(res_buildings, 'geometry')

res_buildings_array = np.array(res_buildings['building_id'])
res_buildings_coordinates_array = np.array(res_buildings['coordinates'])

In [None]:
# Create arrays to track ordering (Commercial)
comm_buildings = buildings_df[buildings_df['class_reco'].str.contains('commercial')]
comm_buildings = comm_buildings.sort_values('building_id')
comm_buildings = dc.get_geocoordinate(comm_buildings, 'geometry')
comm_buildings_geoid = comm_buildings[comm_buildings["geoid10"] == str(420030406001)]

#comm_buildings_geoid_array = np.array(comm_buildings_geoid['building_id'])
comm_buildings_array = np.array(comm_buildings_geoid['building_id'])
comm_buildings_coordinates_array = np.array(comm_buildings_geoid['coordinates'])


In [None]:
comm_buildings_array

In [None]:
%%time

# Create parameter matrices (Res Population - Pj)
# ith value indicates the population in the ith column
res_population = pa.get_population(geopandas_dataframe=res_buildings) 
res_population_array = np.array(res_population['population'])
res_population_array



In [1]:
# Create arrays to track ordering (grocery stores)
grocery_stores = buildings_df[buildings_df['class_reco'].str.contains('Grocery')]
grocery_stores = grocery_stores.sort_values('building_id')
grocery_stores = dc.get_geocoordinate(grocery_stores, 'geometry')

grocery_stores_array = np.array(grocery_stores['building_id'])
grocery_stores_coordinates_array = np.array(grocery_stores['coordinates'])


NameError: name 'buildings_df' is not defined

In [2]:
%%time

# Create parameter matrices (Res groc access array - Aj)
# ith value indicates whether the ith residential building has existing access
res_groc_distance_matrix, res_groc_access_matrix = dc.calculate_access(res_buildings_coordinates_array, grocery_stores_coordinates_array)
res_access_array = np.amax(res_groc_access_matrix, 1)


NameError: name 'dc' is not defined

In [None]:
random_build = comm_buildings_geoid.sample(n=1, random_state=1)

In [None]:
random_build 

In [None]:
np.where(comm_buildings_array == "106710-C")

In [None]:
###########################
# STEP 1: Take the res_comm_access_matrix, remove those rows (each row represents a residential building) which have existing access
###########################
existing_access_indices = res_access_array.nonzero()[0] # These are indices of residential buildings that currently have access
res_comm_access_matrix_subset = np.delete(res_comm_access_matrix, existing_access_indices, axis=0 )

###########################
# STEP 2: Do the same thing for res_population_array so that the ordering matches
###########################
res_population_array_sub = np.delete(res_population_array, existing_access_indices, axis=0)

###########################
# STEP 3: Do a matrix multiplication between res_population_array_sub and res_comm_access_matrix_sub
###########################

# How this works:

# 1. Reshape res_population_array_sub to be (1 * 2780) 2D array
# 2. res_comm_access_matrix_sub is (2780 * 6895)
# 3. When you do matrix multiplication of 1 and 2, you get a (1*6895) array
# 4. Each element of this array would represent the sum of the population at each residential building multiplied by whether that residential building and that particular commercial building
# are within access region. So for example, first element of this result would be P0 * whether res building 0 and comm building 0 are within access + P1 * whether res building 1 and comm building 0 are within access and so on
# So each element of the result represents the total new population that would gain access if a commercial building is put at that index

res_population_array_sub = np.reshape(res_population_array_sub, (-1, len(res_population_array_sub)))
new_access_array = np.matmul(res_population_array_sub, res_comm_access_matrix_subset)

###########################
# STEP 4: Print results
###########################
chosen_comm_index = np.argmax(new_access_array)
chosen_comm_building = comm_buildings_array[chosen_comm_index]
new_access_created = np.max(new_access_array)

print(f"the new store should be put at commercial building {chosen_comm_building}")
print(f"putting the store here would give access to {new_access_created} new people")
