In [41]:
import pandas as pd
import geopandas as gpd
import os
import numpy as np

path = os.path.dirname(os.getcwd())

tif_districts_gdf = gpd.read_file(os.path.join(path, "Data/Processed/tif_districts.shp"))
metra_lines_gdf = gpd.read_file(os.path.join(path, "Data/Raw/MetraLinesshp.shp"))
l_lines = pd.read_csv(os.path.join(path, "Data/Raw/CTA_l_lines.csv"))
bus_routes_gdf = gpd.read_file(os.path.join(path, "Data/Processed/bus_routes.shp"))
etod_lots_tifs = gpd.read_file(os.path.join(path, "Data/Processed/etod_lots_tifs.shp"))
neighborhoods = pd.read_csv(os.path.join(path, "Data/Raw/Neighborhoods.csv"))
unit_area = pd.read_csv(os.path.join(path, "Data/Raw/zone min unit area.csv"))

sale_buildings_gdf = gpd.read_file(os.path.join(path, "Data/Processed/sale_buildings.shp"))
vacant_buildings_gdf = gpd.read_file(os.path.join(path, "Data/Processed/vacant_buildings.shp"))
merged_neighborhoods_gdf = gpd.read_file(os.path.join(path, "Data/Processed/neighborhood_level.shp"))

In [29]:
#merge FAR and min unit area info
etod_lots_tifs.replace({"sq_ft": 0.0}, np.nan, inplace=True)
etod_lots_tifs = pd.merge(etod_lots_tifs, unit_area, on="zoning", how="outer")
etod_lots_tifs

In [30]:
#assume 20% of all lot square footage cannot be used for unit calculation
etod_lots_tifs["sq_ft_rentable"] = etod_lots_tifs["sq_ft"]*0.8

#update sqft based on far
etod_lots_tifs["sq_ft_far"] = etod_lots_tifs["sq_ft_rentable"]*etod_lots_tifs["FAR"]

#for non residential zoned lots, calculate sq footage above ground floor
etod_lots_tifs["sq_ft_residential"] = np.where((etod_lots_tifs["zone_cat"]=="B-Business") |
                                               (etod_lots_tifs["zone_cat"]=="C-Commercial"), 
                                               etod_lots_tifs["sq_ft_far"] - etod_lots_tifs["sq_ft_rentable"], 
                                               etod_lots_tifs["sq_ft_far"])

#assume 720 sq. ft. average unit size unless min unit size is larger
etod_lots_tifs["avg_unit_size"] = np.where(etod_lots_tifs["lot_area_per_unit"] > 720, 
                                           etod_lots_tifs["lot_area_per_unit"], 720)

KeyError: 'FAR'

In [35]:
# calculate estimate of number of units per lot
# 0 units if residential eligible sq ft is smaller than minimum unit size
etod_lots_tifs["n_units"] = np.where(etod_lots_tifs["avg_unit_size"] > etod_lots_tifs["sq_ft_residential"], 0, np.nan)

# divide residential eligible sq ft by average unit size for all others and round down
etod_lots_tifs["n_units"] = np.where(etod_lots_tifs["n_units"].isna(), 
                                     np.floor(etod_lots_tifs["sq_ft_residential"]/etod_lots_tifs["avg_unit_size"]), 
                                     etod_lots_tifs["n_units"])

# 1 unit for single family
etod_lots_tifs["n_units"] = np.where(etod_lots_tifs["zoning"].isin(["RS-1", "RS-2", "RS-3"]), 1, etod_lots_tifs["n_units"])

In [36]:
# calculate average number of units by zone to impute for lots missing sqft info
avg_units_zone = etod_lots_tifs.groupby("zoning")["n_units"].mean().reset_index(name="imputed_n_units")
etod_lots_tifs = pd.merge(etod_lots_tifs, avg_units_zone, on="zoning", how="outer")

# impute
etod_lots_tifs["n_units"] = np.where(etod_lots_tifs["n_units"].isna(), 
                                     np.floor(etod_lots_tifs["imputed_n_units"]),
                                     etod_lots_tifs["n_units"])

In [47]:
etod_lots_tifs["n_units"].sum(skipna=True)

np.float64(53282.0)

In [17]:
#calculate number of lots by zone and by neighborhood
lots_by_zone_neigh = etod_lots_tifs.groupby(["Community", "zoning"]).size().reset_index(name="n_lots")

#calculate number of lots by neighborhood
lots_by_neigh = etod_lots_tifs.groupby(["Community"]).size().reset_index(name="n_lots_neigh")

#calculate number of lots by zone category and neighborhood
lots_by_zone_cat = etod_lots_tifs.groupby(["Community", "zone_cat"]).size().reset_index(name="n_lots_cat")


In [18]:
#create broader category for zones
zone_cats = {"B-Business":"B", 
             "C-Commercial":"C",
             "D-Downtown": "D",
             "PD-Planned Development":"PD",
             "R-Residential":"R"}

def map_category(item):
    for key, value in zone_cats.items():
        if item.startswith(value):  # Check if item starts with dictionary value
            return key
    return "Unknown"  # Default value if no match is found

lots_by_zone_neigh["zone_cat"] = lots_by_zone_neigh["zoning"].apply(map_category)

In [20]:
#merge all counts together
n_lots_neigh_zone = pd.merge(lots_by_zone_neigh, lots_by_neigh, on="Community", how="outer")
n_lots_neigh_zone = pd.merge(n_lots_neigh_zone, lots_by_zone_cat, on=["Community", "zone_cat"], how="outer")
n_lots_neigh_zone = n_lots_neigh_zone.sort_values(by=["Community", "zoning"])

In [45]:
# calculate number of units by zone
etod_lots_tifs.loc[etod_lots_tifs["n_units"] == "unknown", "n_units"] = np.nan
etod_lots_tifs["n_units"] = etod_lots_tifs["n_units"].astype(float)
n_units_zone = etod_lots_tifs.groupby("zoning")["n_units"].sum().reset_index(name="n_units")

etod_lots_tifs["updated_zoning"] = np.where(etod_lots_tifs["re_zone"]=="none", etod_lots_tifs["zoning"], etod_lots_tifs["re_zone"])
n_units_zone_og = etod_lots_tifs.groupby("updated_zoning")["n_units"].sum().reset_index(name="n_units_rezoned")

n_units_all_zoning = pd.merge(n_units_zone, n_units_zone_og, left_on="zoning", right_on="updated_zoning", how="outer")

In [46]:
n_lots_neigh_zone.to_csv(os.path.join(path, "Data/Processed/vacant_lots_zone_counts.csv"))
n_units_all_zoning.to_csv(os.path.join(path, "Data/Processed/vacant_lots_unit_counts_by_zone.csv"))

In [None]:
#Analysis for buildings for sale and vacant buildings
#Data Cleaning
#renaming zones
sale_buildings_gdf = sale_buildings_gdf.rename(columns={"ZONE_CLASS": "zoning"})
vacant_buildings_gdf = vacant_buildings_gdf.rename(columns={"ZONE_CLASS": "zoning"})

#data cleaning for square foot info 
sale_buildings_gdf['SqFt'] = sale_buildings_gdf['SqFt'].astype(str)
vacant_buildings_gdf['SqFt'] = vacant_buildings_gdf['SqFt'].astype(str)

sale_buildings_gdf['SqFt'] = pd.to_numeric(sale_buildings_gdf['SqFt'].str.replace(',', ''), errors='coerce')
sale_buildings_gdf['SqFt'] = sale_buildings_gdf['SqFt'].fillna(0.0).astype(int)
sale_buildings_gdf.replace({"SqFt": 0.0}, np.nan, inplace=True)

sale_buildings_gdf["SqFt"] = np.where(
    sale_buildings_gdf["zoning"].isin(["RT-4", "RT-3.5", "RT-4A"]) &
    sale_buildings_gdf["SqFt"].isna(),
    1320, 
    sale_buildings_gdf["SqFt"]  
)

vacant_buildings_gdf['SqFt'] = pd.to_numeric(vacant_buildings_gdf['SqFt'].str.replace(',', ''), errors='coerce')
vacant_buildings_gdf['SqFt'] = vacant_buildings_gdf['SqFt'].fillna(0.0).astype(int)
vacant_buildings_gdf.replace({"SqFt": 0.0}, np.nan, inplace=True)

vacant_buildings_gdf["SqFt"] = np.where(
    vacant_buildings_gdf["zoning"].isin(["RT-4", "RT-3.5", "RT-4A", "RM-4.5", "RM-5", "RM-5.5", "RM-6.5"]) &
    vacant_buildings_gdf["SqFt"].isna(),
    1320,
    vacant_buildings_gdf["SqFt"]  
)

In [None]:

#merging with unit area data
sale_buildings_gdf = pd.merge(sale_buildings_gdf, unit_area, on="zoning", how="outer")
vacant_buildings_gdf = pd.merge(vacant_buildings_gdf, unit_area, on="zoning", how="outer")

#update sqft based on far only for RM and RT units missing info. used 1321 bc its a unique identifier for these obs
sale_buildings_gdf["sq_ft"] = np.where((sale_buildings_gdf["SqFt"] == 1321),  
                                       sale_buildings_gdf["SqFt"]*sale_buildings_gdf["FAR"],
                                       sale_buildings_gdf["SqFt"])

vacant_buildings_gdf["sq_ft"] = np.where((vacant_buildings_gdf["SqFt"] == 1321),  
                                       vacant_buildings_gdf["SqFt"]*vacant_buildings_gdf["FAR"],
                                       vacant_buildings_gdf["SqFt"])


#for non residential zoned lots, calculate sq footage above ground floor
sale_buildings_gdf["sq_ft_residential"] = np.where((sale_buildings_gdf["ZONE_CAT"]=="B-Business") |
                                               (sale_buildings_gdf["ZONE_CAT"]=="C-Commercial"),
                                               sale_buildings_gdf["sq_ft"]*.75, 
                                               sale_buildings_gdf["sq_ft"])

#assume 720 sq. ft. average unit size unless min unit size is larger
sale_buildings_gdf["avg_unit_size"] = np.where(sale_buildings_gdf["lot_area_per_unit"] > 720, 
                                           sale_buildings_gdf["lot_area_per_unit"], 720)

#for non residential zoned lots, calculate sq footage above ground floor
vacant_buildings_gdf["sq_ft_residential"] = np.where((vacant_buildings_gdf["ZONE_CAT"]=="B-Business") |
                                               (vacant_buildings_gdf["ZONE_CAT"]=="C-Commercial"), 
                                               vacant_buildings_gdf["sq_ft"]*.75, 
                                               vacant_buildings_gdf["sq_ft"])

#assume 720 sq. ft. average unit size unless min unit size is larger
vacant_buildings_gdf["avg_unit_size"] = np.where(vacant_buildings_gdf["lot_area_per_unit"] > 720, 
                                           vacant_buildings_gdf["lot_area_per_unit"], 720)


In [None]:

# calculate estimate of number of units per lot
# 0 units if residential eligible sq ft is smaller than minimum unit size
sale_buildings_gdf["n_units"] = np.where(sale_buildings_gdf["avg_unit_size"] > sale_buildings_gdf["sq_ft_residential"], 0, np.nan)

# divide residential eligible sq ft by average unit size for all others and round down
sale_buildings_gdf["n_units"] = np.where(sale_buildings_gdf["n_units"].isna(), 
                                     np.floor(sale_buildings_gdf["sq_ft_residential"]/sale_buildings_gdf["avg_unit_size"]), 
                                     sale_buildings_gdf["n_units"])

# 1 unit for single family
sale_buildings_gdf["n_units"] = np.where(sale_buildings_gdf["zoning"].isin(["RS-1", "RS-2", "RS-3"]), 1, sale_buildings_gdf["n_units"])

# calculate estimate of number of units per lot
# 0 units if residential eligible sq ft is smaller than minimum unit size
vacant_buildings_gdf["n_units"] = np.where(vacant_buildings_gdf["avg_unit_size"] > vacant_buildings_gdf["sq_ft_residential"], 0, np.nan)

# divide residential eligible sq ft by average unit size for all others and round down
vacant_buildings_gdf["n_units"] = np.where(vacant_buildings_gdf["n_units"].isna(), 
                                     np.floor(vacant_buildings_gdf["sq_ft_residential"]/vacant_buildings_gdf["avg_unit_size"]), 
                                     vacant_buildings_gdf["n_units"])

# 1 unit for single family
vacant_buildings_gdf["n_units"] = np.where(vacant_buildings_gdf["zoning"].isin(["RS-1", "RS-2", "RS-3"]), 1, vacant_buildings_gdf["n_units"])



In [None]:

print(sale_buildings_gdf["n_units"].sum(skipna=True)+vacant_buildings_gdf["n_units"].sum(skipna=True))
