In [32]:
import pandas as pd
import numpy as np
import plotly.express as px

# State data

In [33]:
# Read in the final data
df = pd.read_csv("../../data/final/nci_state.csv")

In [34]:
# Read county geojson from the plotly GitHub
states = pd.read_csv("../../data/raw/state_codes.csv")

In [35]:
# Join to get state codes
df = df.set_index("fips").join(states.set_index("fips")).reset_index()

In [36]:
def plot_states(metric):
    # Grab the subset of data for this metric
    subset = df[["state", metric]]
    subset = subset.dropna()
    
    # Calculate summary statistics
    minimum = subset[metric].min()
    maximum = subset[metric].max()
    mean = subset[metric].mean()
    std = subset[metric].std()

    # Plot choropleth
    fig = px.choropleth(
        subset,
        #geojson=states,
        locations="state",
        color=metric,
        # Some series have extreme outliers so scale colors to 95% confidence interval
        range_color=(max(mean - 2 * std, minimum), min(mean + 2 * std, maximum)),
        color_continuous_scale="Viridis",
        locationmode = 'USA-states',
        scope="usa",
    )

    fig.update_layout(title_text=metric, title_x=0.5)
    fig.layout.coloraxis.colorbar.title = ""
    fig.update_layout(
        margin=dict(l=0, r=0, t=40, b=0),
    )

    fig.show()

In [37]:
plot_states("cancer_incidence_rate_per_100000")

In [38]:
plot_states("cancer_mortality_rate_per_100000")