# Shops and Retail Pipeline

This notebook extracts retail related businesses from the Chicago Business Licenses dataset and aggregates them by Community Area. It identifies shop like establishments such as retail stores, consumer services, small shops, convenience stores, and other commercial retail activity. The goal is to build a measure of retail supply across Community Areas that can be used for opportunity and saturation analysis.


In [1]:
import pandas as pd
import geopandas as gpd

# -----------------------------------------------------------
# 1. Load data
# -----------------------------------------------------------

bl_url = "https://data.cityofchicago.org/resource/r5kz-chrr.csv"
ca_url = "https://data.cityofchicago.org/resource/igwz-8jzy.geojson"

bl = pd.read_csv(bl_url)
ca = gpd.read_file(ca_url).to_crs("EPSG:4326")

# CA cleaning
ca = ca.rename(columns={"area_numbe": "ca_num", "community": "ca_name"})
ca["ca_num"] = ca["ca_num"].astype(int)
ca["ca_name"] = ca["ca_name"].str.upper().str.strip()

In [2]:
# -----------------------------------------------------------
# 2. Filter relevant retail categories
# -----------------------------------------------------------

retail_categories = [
    "Limited Business License",
    "Retail Food Establishment",
    "Tobacco Dealer",
    "Commercial Retail Sales",
    "Motor Vehicle Repair",
    "Consumer Services"
]

bl_retail = bl[bl["license_description"].isin(retail_categories)].copy()
bl_retail = bl_retail.dropna(subset=["latitude", "longitude"])

# Convert to GeoDataFrame
bl_retail_gdf = gpd.GeoDataFrame(
    bl_retail,
    geometry=gpd.points_from_xy(
        bl_retail["longitude"].astype(float),
        bl_retail["latitude"].astype(float)
    ),
    crs="EPSG:4326"
)


In [3]:
# -----------------------------------------------------------
# 3. Spatial join with CA boundaries
# -----------------------------------------------------------

retail_join = gpd.sjoin(
    bl_retail_gdf,
    ca[["ca_num", "ca_name", "geometry"]],
    how="inner",
    predicate="within"
)

In [None]:
# -----------------------------------------------------------
# 4. Aggregate by CA
# -----------------------------------------------------------

retail_counts = retail_join.groupby("ca_num").size().reset_index(name="retail_count")

# Merge into CA table
retail_df = ca[["ca_num", "ca_name"]].merge(retail_counts, on="ca_num", how="left")
retail_df["retail_count"] = retail_df["retail_count"].fillna(0)

# Save output
retail_df.to_csv("../datasets/retail_by_CA.csv", index=False)

retail_df.head()

Unnamed: 0,ca_num,ca_name,retail_count
0,1,ROGERS PARK,2.0
1,2,WEST RIDGE,17.0
2,3,UPTOWN,5.0
3,4,LINCOLN SQUARE,9.0
4,5,NORTH CENTER,4.0
