In [None]:
import json

import geopandas as gpd
import pandas as pd
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import requests
import us

from census import Census
from shapely.geometry import Point

import plotly.express as px

from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

Imports, data cleaning, copied from `sor.ipynb`

In [None]:
census = Census("", year=2020)
percents = pd.read_csv('all_race_pct_by_county.csv')
percents = percents.astype({'GEOID': 'str'})

string_fips = ['0']*len(percents)
for i in range(len(percents)):
    if len(percents.iloc[i]['GEOID']) == 4: 
        string_fips[i] = '0' + percents.iloc[i]['GEOID']
    else: 
        string_fips[i] = percents.iloc[i]['GEOID']
percents['GEOID'] = string_fips

def fips_to_string(dataframe, fips_col_name):
    dataframe = dataframe.astype({fips_col_name: 'str'})
    string_fips = ['0']*len(dataframe)
    for i in range(len(dataframe)):
        if len(dataframe.iloc[i][fips_col_name]) == 4: 
            dataframe[i] = '0' + dataframe.iloc[i][fips_col_name]
        else: 
            string_fips[i] = dataframe.iloc[i][fips_col_name]
    dataframe[fips_col_name] = string_fips
    return dataframe

def make_heatmap(dataframe, fips_column_name, column_name):
    maximum = dataframe[column_name].max()
    fig = px.choropleth(dataframe, geojson=counties, locations=fips_column_name, color=column_name,
                           color_continuous_scale="Viridis",
                           range_color=(0, maximum),
                           scope="usa",
                           labels={'white':'percent white pop'}
                          )
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.show()

My functions that let you animate the coloropleths with a threshold slider

In [None]:
def thresh_df(percents, col_count,col_pct,thresh):
    
    N_counties = len(percents["GEOID"])
    N_thresh = len(thresh)
    base_df = percents[[col_count, col_pct, "GEOID"]]
    concat_base_df = pd.concat([base_df for i in range(N_thresh)])
    many_threshs = pd.Series(np.array([[thresh[i] for j in range(N_counties)] for i in range(N_thresh)]).ravel())

    many_threshs.index = concat_base_df.index
    many_threshs.name = "thresh" 
    big_df = pd.concat([concat_base_df, many_threshs],axis = 1)

    return big_df[big_df["h_other"]>= big_df["thresh"]]

def choropleth_with_slider(percents, col_count, col_pct, thresh_values, counties_geojson):
    """
    col_count is the column of counts you want to set thresholds for
    col_pct is the corresponding percent column you want to plot at various thresholds
    thresh_values is a list or array of threshold values. At each threshold, everything ABOVE that threshold is plotted
    counties_geojson is just counties
    """
    
    # call the other fn
    big_df = thresh_df(percents, col_count, col_pct, thresh_values)
    
    max_value = big_df[col_pct].max()
    fig = px.choropleth(
        big_df,
        geojson=counties_geojson,
        locations="GEOID",
        color=col_pct,
        color_continuous_scale="Viridis",
        range_color=(0, max_value),
        scope="usa",
        labels={col_pct: col_pct},
        animation_frame="thresh" #### IMPORTANT, tells what to use for animation frames
    )
    fig.update_layout(
        margin={"r":0, "t":0, "l":0, "b":0},
        sliders=[{
            "active": 0,
            "steps": [{
                "args": [["%s" % t], {"frame": {"duration": 300, "redraw": True}, "mode": "immediate"}],
                "label": f"Threshold {t}",
                "method": "animate"
            } for t in thresh_values]
        }]
    )
    fig.show()

Example with thresholds for `h_other`, plotting `h_other_pct`:

In [None]:
#thresh_values = [0,5,10,25,50,75,100,250,500,750,1000,2500,5000,7500,10000,25000,50000,100000]  # or any list of thresholds
thresh_values = np.rint(np.geomspace(5,100000,20))
thresh_values = np.insert(thresh_values, 0, 0)

choropleth_with_slider(percents, "h_other", "h_other_pct", thresh_values, counties)

In this example I used exponentially increasing thresholds, since that seemed to create a plot that falls off at a pretty constant rate, though this may not be the same for other data! I've included a cell below to visualize how the thresholds increase.

In [None]:
x = [i+1 for i in range(len(thresh_values))]
y = thresh_values
fig, ax = plt.subplots(1,2, dpi = 300, figsize = (10,5))

ax[0].scatter(x, y)
ax[0].set_xticks(x, x, rotation='horizontal')
#ax[0].set_yticks(y, y, rotation='horizontal')
ax[0].grid()
ax[0].set_title("thresholds used (linear scale)")
ax[0].set_xlabel("index")
ax[0].set_ylabel("threshold")

ax[1].scatter(x, y)
ax[1].set_xticks(x, x, rotation='horizontal')
#ax[1].set_yticks(y, y, rotation='horizontal')
ax[1].grid()
ax[1].set_title("thresholds used (log scale)")
ax[1].set_xlabel("index")
ax[1].set_ylabel("threshold")
ax[1].set_yscale('log')
plt.tight_layout()
plt.show()