In [3]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
import folium

def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in km
    lat1 = np.radians(lat1)
    lon1 = np.radians(lon1)
    lat2 = np.radians(lat2)
    lon2 = np.radians(lon2)

    dlat = lat2 - lat1
    dlon = lon2 - lon1

    a = np.sin(dlat / 2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2.0)**2
    c = 2 * np.arcsin(np.sqrt(a))
    return R * c

def randomColor():
    color = "#"+"".join(random.choice("0123456789ABCDEF") for i in range(6))
    return color

def jitter(dataFrame, clrSeed):
    rng = np.random.default_rng(seed=clrSeed)
    jitter_strength = 1000

    dataFrame["TotalPopJitter"] = dataFrame["Total Population"] + rng.normal(0, jitter_strength, size = len(dataFrame))
    return dataFrame.sort_values(by = "TotalPopJitter", ascending = False).reset_index(drop = True)

clusterScaleFactor = float(input("Cluster Scale Factor (Float): "))
clusterSeed = int(input("Seed (Integer): "))
hideRural = bool(input("Hide rural? (True/False): "))
keepBorders = bool(input("Keep borders? (True/False, removing borders is a bit ugly unfortunately): "))
clusterIndex = 0
clusterColors = ["#cccccc"]

tractLocsPops = gpd.read_file("tractLocsPops.geojson")
#tractLocsPops = tractLocsPops.sort_values(by = "Total Population", ascending = False).reset_index(drop=True)
tractLocsPops = jitter(tractLocsPops, clusterSeed)
tractLocsPops["cluster"] = None

for i in range(len(tractLocsPops)):
    lat0 = tractLocsPops.iloc[i]["INTPTLAT"]
    lon0 = tractLocsPops.iloc[i]["INTPTLON"]
    tractLocsPops["_distance"] = haversine(lat0, lon0, tractLocsPops["INTPTLAT"],tractLocsPops["INTPTLON"])
        
    clusterScale = clusterScaleFactor*np.sqrt(tractLocsPops.iloc[i]["Total Population"])
    clustered = pd.notna(tractLocsPops.iloc[i]["cluster"])
    near = tractLocsPops["_distance"] <= clusterScale
    unassigned = tractLocsPops["cluster"].isna()
    nearClusters = tractLocsPops.loc[near & (~unassigned), "cluster"].unique()
    
    if (not clustered) & tractLocsPops.loc[near, "cluster"].isna().all():
        clusterIndex += 1
        clusterColors.append(randomColor())
        tractLocsPops.loc[near & unassigned, "cluster"] = clusterIndex
    elif not clustered:
        nearestCluster = tractLocsPops[near & (~unassigned)].nsmallest(1, "_distance")["cluster"].squeeze()
        
        #tractLocsPops.loc[near & unassigned, "cluster"] = nearestCluster
        tractLocsPops.loc[near, "cluster"] = nearestCluster
        tractLocsPops.loc[tractLocsPops["cluster"].isin(nearClusters), "cluster"] = nearestCluster
    else:
        #tractLocsPops.loc[near & unassigned, "cluster"] = tractLocsPops.iloc[i]["cluster"]
        tractLocsPops.loc[near, "cluster"] = tractLocsPops.iloc[i]["cluster"]
        tractLocsPops.loc[tractLocsPops["cluster"].isin(nearClusters), "cluster"] = tractLocsPops.iloc[i]["cluster"]

tractLocsPops.drop(columns = ["_distance"], inplace = True)
tractLocsPops["cluster"] = tractLocsPops["cluster"].astype(int)

if hideRural:
    clusterSizes = tractLocsPops["cluster"].value_counts()
    tractLocsPops["_cluster_freq"] = tractLocsPops["cluster"].map(clusterSizes)
    tractLocsPops.loc[tractLocsPops["_cluster_freq"] <= 2, "cluster"] = 0
    tractLocsPops.drop(columns = ["_cluster_freq"], inplace = True)
    
# ax = tractLocsPops.plot(figsize = (10,10), color = [clusterColors[i] for i in tractLocsPops["cluster"]])
# plt.show()

def tractStyle(feature):
    cluster = feature["properties"]["cluster"]
    color = clusterColors[cluster]
    return {
        "fillColor": color,
        "fillOpacity": 0.8,
        "color": "none",
        "weight": int(keepBorders)
    }

m = folium.Map(location=[37.7749, -122.4194], zoom_start=8)

folium.GeoJson(
    tractLocsPops,
    style_function = tractStyle
).add_to(m)
m.save("TractClusters.html")

Cluster Scale Factor (Float):  0.1
Seed (Integer):  1
Hide rural? (True/False):  True
Keep borders? (True/False, removing borders is a bit ugly unfortunately):  True
