In [1]:
#%pip install "leafmap[maplibre]" scikit-learn

In [2]:
import os
import pandas as pd
import geopandas as gpd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from leafmap.common import evaluate_model, plot_actual_vs_predicted, download_file
import leafmap.maplibregl as leafmap

Download the Zillow home value data at the county level.

In [3]:
zhvi_url = "https://github.com/opengeos/datasets/releases/download/us/zillow_home_value_index_by_county.csv"
# Save to home directory to avoid read-only file system issues
zhvi_file = os.path.expanduser("~/zillow_home_value_index_by_county.csv")

In [4]:
if not os.path.exists(zhvi_file):
    download_file(zhvi_url, zhvi_file)

Process Zillow Data

In [5]:
zhvi_df = pd.read_csv(
    zhvi_file, dtype={"StateCodeFIPS": "string", "MunicipalCodeFIPS": "string"}
)
zhvi_df.index = "geoId/" + zhvi_df["StateCodeFIPS"] + zhvi_df["MunicipalCodeFIPS"]
zhvi_df.head()

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,State,Metro,StateCodeFIPS,MunicipalCodeFIPS,2000-01-31,...,2024-04-30,2024-05-31,2024-06-30,2024-07-31,2024-08-31,2024-09-30,2024-10-31,2024-11-30,2024-12-31,2025-01-31
geoId/06037,3101,0,Los Angeles County,county,CA,CA,"Los Angeles-Long Beach-Anaheim, CA",6,37,206685.940073,...,848100.109098,851249.814267,853328.259951,856958.247702,862129.887015,868712.125738,873819.306305,877921.805494,881313.863788,880547.372613
geoId/17031,139,1,Cook County,county,IL,IL,"Chicago-Naperville-Elgin, IL-IN-WI",17,31,145737.060609,...,297797.137247,299609.927472,300652.787054,301303.254592,302118.600852,302781.699599,303133.869223,303523.016515,304181.858348,305000.265124
geoId/48201,1090,2,Harris County,county,TX,TX,"Houston-The Woodlands-Sugar Land, TX",48,201,109327.540062,...,282689.212331,283279.667076,283161.721769,282792.559444,282454.169156,282239.614216,281873.680565,281423.466788,281191.55441,281000.895149
geoId/04013,2402,3,Maricopa County,county,AZ,AZ,"Phoenix-Mesa-Chandler, AZ",4,13,142829.577207,...,467600.239765,468996.512594,469164.40105,468562.421846,467451.263586,466509.197532,465782.610071,465120.188374,464544.544452,463613.340231
geoId/06073,2841,4,San Diego County,county,CA,CA,"San Diego-Chula Vista-Carlsbad, CA",6,73,212384.876155,...,918694.464981,927384.144048,931835.8125,933267.733074,932833.552309,932389.735948,931683.464702,931874.623336,932380.083872,932486.30519


Request access to PDFM Embeddings


In [6]:
# Save to home directory to avoid read-only file system issues
county_geojson = os.path.expanduser("~/county.geojson")

# Download county GeoJSON if it doesn't exist
county_url = "https://github.com/opengeos/datasets/releases/download/us/us_counties.geojson"

if not os.path.exists(county_geojson):
    print("Downloading county GeoJSON data...")
    download_file(county_url, county_geojson)
else:
    print(f"County GeoJSON file already exists: {county_geojson}")

County GeoJSON file already exists: /Users/haritshah/county.geojson


Load county boundaries

In [7]:
county_gdf = gpd.read_file(county_geojson)
county_gdf.set_index("COUNTY", inplace=True)
county_gdf.head()

Unnamed: 0_level_0,GEO_ID,STATE,NAME,LSAD,CENSUSAREA,geometry
COUNTY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
29,0500000US01029,1,Cleburne,County,560.1,"MULTIPOLYGON (((-85.38872 33.91304, -85.38088 ..."
31,0500000US01031,1,Coffee,County,678.972,"MULTIPOLYGON (((-86.03044 31.61894, -86.00409 ..."
37,0500000US01037,1,Coosa,County,650.926,"MULTIPOLYGON (((-86.00928 33.10164, -86.00917 ..."
39,0500000US01039,1,Covington,County,1030.456,"MULTIPOLYGON (((-86.34851 30.99434, -86.35023 ..."
41,0500000US01041,1,Crenshaw,County,608.84,"MULTIPOLYGON (((-86.14699 31.68045, -86.14711 ..."


In [13]:
# Fix the join by creating proper index mapping
print("Checking data indices...")
print("Sample Zillow indices:", zhvi_df.index[:5].tolist())
print("Sample county indices:", county_gdf.index[:5].tolist())

# Reset county_gdf and create proper index to match Zillow format
county_gdf_reset = county_gdf.reset_index()

# Extract FIPS code from GEO_ID and create geoId format
# GEO_ID format: '0500000US01029' -> extract '01029' -> create 'geoId/01029'
county_gdf_reset['FIPS'] = county_gdf_reset['GEO_ID'].str[-5:]  # Last 5 characters
county_gdf_reset['geoId'] = 'geoId/' + county_gdf_reset['FIPS']
county_gdf_reset.set_index('geoId', inplace=True)

print("Sample indices after conversion:")
print("Zillow indices:", zhvi_df.index[:5].tolist())
print("County indices:", county_gdf_reset.index[:5].tolist())

# Check for common indices
common_indices = set(zhvi_df.index) & set(county_gdf_reset.index)
print(f"\nNumber of common indices: {len(common_indices)}")

if len(common_indices) > 0:
    print("Sample common indices:", list(common_indices)[:5])
    
    # Perform the join
    df = zhvi_df.join(county_gdf_reset)
    zhvi_gdf = gpd.GeoDataFrame(df, geometry="geometry")
    
    print(f"\nJoin result shape: {zhvi_gdf.shape}")
    print(f"Rows with valid geometry: {zhvi_gdf.geometry.notna().sum()}")
    
    zhvi_gdf.head()
else:
    print("No common indices found - need to investigate further")
    zhvi_gdf = None

Checking data indices...
Sample Zillow indices: ['geoId/06037', 'geoId/17031', 'geoId/48201', 'geoId/04013', 'geoId/06073']
Sample county indices: ['029', '031', '037', '039', '041']
Sample indices after conversion:
Zillow indices: ['geoId/06037', 'geoId/17031', 'geoId/48201', 'geoId/04013', 'geoId/06073']
County indices: ['geoId/01029', 'geoId/01031', 'geoId/01037', 'geoId/01039', 'geoId/01041']

Number of common indices: 3073
Sample common indices: ['geoId/21031', 'geoId/31181', 'geoId/30031', 'geoId/06103', 'geoId/26081']

Join result shape: (3073, 318)
Rows with valid geometry: 3073


In [14]:
column = "2024-10-31"
gdf = zhvi_gdf[["RegionName", "State", column, "geometry"]]
gdf.head()

Unnamed: 0,RegionName,State,2024-10-31,geometry
geoId/06037,Los Angeles County,CA,873819.306305,"MULTIPOLYGON (((-118.52453 32.89549, -118.5358..."
geoId/17031,Cook County,IL,303133.869223,"MULTIPOLYGON (((-87.52464 41.62261, -87.52464 ..."
geoId/48201,Harris County,TX,281873.680565,"MULTIPOLYGON (((-94.99230 29.58816, -95.00686 ..."
geoId/04013,Maricopa County,AZ,465782.610071,"MULTIPOLYGON (((-112.91585 32.50536, -112.9322..."
geoId/06073,San Diego County,CA,931683.464702,"MULTIPOLYGON (((-116.75735 32.56561, -116.8222..."


In [16]:
m = leafmap.Map(style="liberty")
first_symbol_id = m.find_first_symbol_layer()["id"]
m.add_data(
    gdf,
    cmap="Blues",
    column=column,
    legend_title="Median Home Value",
    name="Median Home Value",
    before_id=first_symbol_id,
)
m.add_layer_control()
m

Container(children=[Row(children=[Col(children=[Map(calls=[['addControl', ('NavigationControl', {'showCompass'…

In [15]:
m = leafmap.Map(style="liberty", pitch=60)
m.add_data(
    gdf,
    cmap="Blues",
    column=column,
    legend_title="Median Home Value",
    extrude=True,
    scale_factor=3,
    before_id=first_symbol_id,
    name="Median Home Value",
)
m.add_layer_control()
m

Container(children=[Row(children=[Col(children=[Map(calls=[['addControl', ('NavigationControl', {'showCompass'…