# Spatial and Similarity Networks

This notebook builds two types of networks for Chicago Community Areas. The first network is a spatial adjacency graph where two areas are connected if their boundaries touch. This graph is used to study spatial spillovers and the extent to which saturated or underserved areas cluster in space.

The second network is a similarity graph based on amenity density and demographic structure. Community Areas are connected when their feature vectors are highly similar. This graph reveals clusters of neighborhoods that share comparable profiles, even if they are far apart in geographic space.

For both networks, the notebook attaches attributes such as the saturation index, hardship index, and per capita income to each node. It then computes simple centrality measures and exports summaries for use in the final analysis and visualizations.


In [4]:
import pandas as pd
import geopandas as gpd
import numpy as np
import networkx as nx
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
# -----------------------------------------------------------
# 1. Load data and geometry
# -----------------------------------------------------------

df = pd.read_csv("saturation_index_by_CA.csv")

df["ca_name"] = df["ca_name"].str.upper().str.strip()

# Load Community Area boundaries
ca_boundaries_url = "https://data.cityofchicago.org/resource/igwz-8jzy.geojson"
ca = gpd.read_file(ca_boundaries_url).to_crs("EPSG:4326")

# Standardize names
ca = ca.rename(columns={"area_numbe": "ca_num", "community": "ca_name"})
ca["ca_num"] = ca["ca_num"].astype(int)
ca["ca_name"] = ca["ca_name"].str.upper().str.strip()

# Join attributes
gdf = ca.merge(df, on=["ca_num", "ca_name"], how="left")

print("Merged geometry and attributes. Rows:", gdf.shape[0])


Merged geometry and attributes. Rows: 77


In [6]:
# -----------------------------------------------------------
# 2. Spatial adjacency network
# -----------------------------------------------------------

neighbors = gpd.sjoin(
    gdf[["ca_num", "ca_name", "geometry"]],
    gdf[["ca_num", "ca_name", "geometry"]],
    how="inner",
    predicate="touches"
)

neighbors = neighbors.rename(columns={
    "ca_num_left": "ca_num_i",
    "ca_name_left": "ca_name_i",
    "ca_num_right": "ca_num_j",
    "ca_name_right": "ca_name_j"
})

edges_adj = neighbors[
    neighbors["ca_num_i"] != neighbors["ca_num_j"]
][["ca_num_i", "ca_num_j"]].drop_duplicates()

edges_adj = pd.DataFrame(
    np.sort(edges_adj.values, axis=1),
    columns=["ca_num_i", "ca_num_j"]
).drop_duplicates()

print("Number of adjacency edges:", edges_adj.shape[0])

# Create graph
G_adj = nx.Graph()

# Add nodes
for _, row in gdf.iterrows():
    G_adj.add_node(
        int(row["ca_num"]),
        ca_name=row["ca_name"],
        saturation_index=row.get("saturation_index", None),
        hardship_index=row.get("hardship_index", None),
        per_capita_income=row.get("per_capita_income", None)
    )

# Add edges
for _, row in edges_adj.iterrows():
    G_adj.add_edge(int(row["ca_num_i"]), int(row["ca_num_j"]))

print("Adjacency graph nodes:", G_adj.number_of_nodes())
print("Adjacency graph edges:", G_adj.number_of_edges())

# Degree centrality
deg_centrality_adj = nx.degree_centrality(G_adj)

centrality_adj_df = pd.DataFrame({
    "ca_num": list(deg_centrality_adj.keys()),
    "degree_centrality_adj": list(deg_centrality_adj.values())
}).merge(
    df[["ca_num", "ca_name", "saturation_index"]],
    on="ca_num",
    how="left"
)

centrality_adj_df.to_csv("adjacency_network_centrality.csv", index=False)

# Save graph using pickle
with open("adjacency_network.gpickle", "wb") as f:
    pickle.dump(G_adj, f)

print("Saved adjacency network centrality and graph.")

Number of adjacency edges: 197
Adjacency graph nodes: 77
Adjacency graph edges: 197
Saved adjacency network centrality and graph.


In [7]:

# -----------------------------------------------------------
# 3. Similarity network
# -----------------------------------------------------------

feature_cols = [
    "saturation_index",
    "scaled_business_license_count",
    "scaled_food_inspections_count",
    "scaled_liquor_license_count",
    "scaled_building_permits_count",
    "pct_dependents",
    "per_capita_income",
    "hardship_index"
]

feature_cols = [c for c in feature_cols if c in gdf.columns]

gdf_features = gdf.dropna(subset=feature_cols).copy()

# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(gdf_features[feature_cols])

# Cosine similarity matrix
sim_matrix = cosine_similarity(X)

similarity_threshold = 0.8
sim_edges = []

n = gdf_features.shape[0]
for i in range(n):
    for j in range(i + 1, n):
        sim = sim_matrix[i, j]
        if sim >= similarity_threshold:
            sim_edges.append((
                int(gdf_features.iloc[i]["ca_num"]),
                int(gdf_features.iloc[j]["ca_num"]),
                sim
            ))

print("Number of similarity edges above threshold:", len(sim_edges))

# Build similarity graph
G_sim = nx.Graph()

for _, row in gdf_features.iterrows():
    G_sim.add_node(
        int(row["ca_num"]),
        ca_name=row["ca_name"],
        saturation_index=row.get("saturation_index", None),
        hardship_index=row.get("hardship_index", None),
        per_capita_income=row.get("per_capita_income", None)
    )

for ca_i, ca_j, sim in sim_edges:
    G_sim.add_edge(ca_i, ca_j, weight=sim)

print("Similarity graph nodes:", G_sim.number_of_nodes())
print("Similarity graph edges:", G_sim.number_of_edges())

# Degree centrality for similarity network
deg_centrality_sim = nx.degree_centrality(G_sim)

centrality_sim_df = pd.DataFrame({
    "ca_num": list(deg_centrality_sim.keys()),
    "degree_centrality_sim": list(deg_centrality_sim.values())
}).merge(
    df[["ca_num", "ca_name", "saturation_index"]],
    on="ca_num",
    how="left"
)

centrality_sim_df.to_csv("similarity_network_centrality.csv", index=False)

# Save similarity graph
with open("similarity_network.gpickle", "wb") as f:
    pickle.dump(G_sim, f)

print("Saved similarity network centrality and graph.")

Number of similarity edges above threshold: 360
Similarity graph nodes: 74
Similarity graph edges: 360
Saved similarity network centrality and graph.
