<a href="https://colab.research.google.com/github/bunby/Figure_2_Choropleth/blob/main/figure_2_m187.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# Load dataset
file_path = "/content/Fentanyl 2018-2023.txt"
df = pd.read_csv(file_path, sep="\t", dtype=str)

# Filter and clean
df = df[df["Single Race 6"] == "American Indian or Alaska Native"]
df = df[df["Year"].notna()]
df["Year"] = df["Year"].astype(int)
df["Crude Rate"] = pd.to_numeric(df["Crude Rate"], errors="coerce")
df = df[df["Crude Rate"].notnull()]
df = df[df["Year"].between(2018, 2023)]
df["Period"] = np.where(df["Year"] <= 2019, "Pre-COVID", "Post-COVID")

# Aggregate crude rates
agg = df.groupby(["State", "Period"])["Crude Rate"].mean().unstack()
agg["% Change"] = ((agg["Post-COVID"] - agg["Pre-COVID"]) / agg["Pre-COVID"]) * 100
agg = agg.reset_index()

# State to abbreviation mapping
state_abbrev = {
    'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA',
    'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'District of Columbia': 'DC',
    'Florida': 'FL', 'Georgia': 'GA', 'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL',
    'Indiana': 'IN', 'Iowa': 'IA', 'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA',
    'Maine': 'ME', 'Maryland': 'MD', 'Massachusetts': 'MA', 'Michigan': 'MI',
    'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO', 'Montana': 'MT',
    'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ',
    'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND',
    'Ohio': 'OH', 'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA',
    'Rhode Island': 'RI', 'South Carolina': 'SC', 'South Dakota': 'SD', 'Tennessee': 'TN',
    'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA',
    'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY'
}
agg["state_code"] = agg["State"].map(state_abbrev)
plot_data = agg.dropna(subset=["% Change", "state_code"])

# Set min/max for colorbar
vmin = plot_data["% Change"].min()
vmax = plot_data["% Change"].max()

# Choropleth map layer
choropleth = go.Choropleth(
    locations=plot_data["state_code"],
    z=plot_data["% Change"],
    locationmode='USA-states',
    colorscale=["white", "red"],
    zmin=vmin,
    zmax=vmax,
    colorbar_title="% Change",
    colorbar=dict(tickvals=[vmin, vmax]),
    marker_line_color='white',
    marker_line_width=0.5
)

# Text annotation layer
scatter = go.Scattergeo(
    locationmode='USA-states',
    locations=plot_data["state_code"],
    text=plot_data["% Change"].round(1).astype(str) + '%',
    mode='text',
    textfont=dict(size=8, color='black'),
    showlegend=False
)

# Create the final figure
fig = go.Figure(data=[choropleth, scatter])
fig.update_layout(
    title_text="Percent Change in Fentanyl Crude Death Rates<br>American Indian or Alaska Native (2018–2019 vs. 2020–2023)",
    geo_scope='usa'
)

fig.show()

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# Load dataset
file_path = "/content/Fentanyl 2018-2023.txt"
df = pd.read_csv(file_path, sep="\t", dtype=str)

# Define race groupings
race_groups = {
    "American Indian or Alaska Native": ["American Indian or Alaska Native"],
    "Asian/Pacific Islander": ["Asian", "Native Hawaiian or Other Pacific Islander"],
    "Black or African American": ["Black or African American"],
    "White": ["White"]
}

# State to abbreviation mapping
state_abbrev = {
    'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA',
    'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'District of Columbia': 'DC',
    'Florida': 'FL', 'Georgia': 'GA', 'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL',
    'Indiana': 'IN', 'Iowa': 'IA', 'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA',
    'Maine': 'ME', 'Maryland': 'MD', 'Massachusetts': 'MA', 'Michigan': 'MI',
    'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO', 'Montana': 'MT',
    'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ',
    'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND',
    'Ohio': 'OH', 'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA',
    'Rhode Island': 'RI', 'South Carolina': 'SC', 'South Dakota': 'SD', 'Tennessee': 'TN',
    'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA',
    'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY'
}

# Loop through each race group and create a map
for group_name, race_values in race_groups.items():
    # Filter and clean
    df_group = df[df["Single Race 6"].isin(race_values)]
    df_group = df_group[df_group["Year"].notna()]
    df_group["Year"] = df_group["Year"].astype(int)
    df_group["Crude Rate"] = pd.to_numeric(df_group["Crude Rate"], errors="coerce")
    df_group = df_group[df_group["Crude Rate"].notnull()]
    df_group = df_group[df_group["Year"].between(2018, 2023)]
    df_group["Period"] = np.where(df_group["Year"] <= 2019, "Pre-COVID", "Post-COVID")

    # Aggregate
    agg = df_group.groupby(["State", "Period"])["Crude Rate"].mean().unstack()
    agg["% Change"] = ((agg["Post-COVID"] - agg["Pre-COVID"]) / agg["Pre-COVID"]) * 100
    agg = agg.reset_index()
    agg["state_code"] = agg["State"].map(state_abbrev)
    plot_data = agg.dropna(subset=["% Change", "state_code"])

    # Colorbar range
    vmin = plot_data["% Change"].min()
    vmax = plot_data["% Change"].max()

    # Choropleth
    choropleth = go.Choropleth(
        locations=plot_data["state_code"],
        z=plot_data["% Change"],
        locationmode='USA-states',
        colorscale=["white", "red"],
        zmin=vmin,
        zmax=vmax,
        colorbar_title="% Change",
        colorbar=dict(tickvals=[vmin, vmax]),
        marker_line_color='white',
        marker_line_width=0.5
    )

    # Text annotations
    scatter = go.Scattergeo(
        locationmode='USA-states',
        locations=plot_data["state_code"],
        text=plot_data["% Change"].round(1).astype(str) + '%',
        mode='text',
        textfont=dict(size=8, color='black'),
        showlegend=False
    )

    # Final figure
    fig = go.Figure(data=[choropleth, scatter])
    fig.update_layout(
        title_text=f"Percent Change in Fentanyl Crude Death Rates<br>{group_name} (2018–2019 vs. 2020–2023)",
        geo_scope='usa'
    )

    fig.show()

In [None]:
# Load dataset
file_path = "/content/Methamphetamines 2018-2023.txt"
df = pd.read_csv(file_path, sep="\t", dtype=str)

# Define race groupings
race_groups = {
    "American Indian or Alaska Native": ["American Indian or Alaska Native"],
    "Asian/Pacific Islander": ["Asian", "Native Hawaiian or Other Pacific Islander"],
    "Black or African American": ["Black or African American"],
    "White": ["White"]
}

# State to abbreviation mapping
state_abbrev = {
    'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA',
    'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'District of Columbia': 'DC',
    'Florida': 'FL', 'Georgia': 'GA', 'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL',
    'Indiana': 'IN', 'Iowa': 'IA', 'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA',
    'Maine': 'ME', 'Maryland': 'MD', 'Massachusetts': 'MA', 'Michigan': 'MI',
    'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO', 'Montana': 'MT',
    'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ',
    'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND',
    'Ohio': 'OH', 'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA',
    'Rhode Island': 'RI', 'South Carolina': 'SC', 'South Dakota': 'SD', 'Tennessee': 'TN',
    'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA',
    'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY'
}

# Loop through each race group and create a map
for group_name, race_values in race_groups.items():
    # Filter and clean
    df_group = df[df["Single Race 6"].isin(race_values)]
    df_group = df_group[df_group["Year"].notna()]
    df_group["Year"] = df_group["Year"].astype(int)
    df_group["Crude Rate"] = pd.to_numeric(df_group["Crude Rate"], errors="coerce")
    df_group = df_group[df_group["Crude Rate"].notnull()]
    df_group = df_group[df_group["Year"].between(2018, 2023)]
    df_group["Period"] = np.where(df_group["Year"] <= 2019, "Pre-COVID", "Post-COVID")

    # Aggregate
    agg = df_group.groupby(["State", "Period"])["Crude Rate"].mean().unstack()
    agg["% Change"] = ((agg["Post-COVID"] - agg["Pre-COVID"]) / agg["Pre-COVID"]) * 100
    agg = agg.reset_index()
    agg["state_code"] = agg["State"].map(state_abbrev)
    plot_data = agg.dropna(subset=["% Change", "state_code"])

    # Colorbar range
    vmin = plot_data["% Change"].min()
    vmax = plot_data["% Change"].max()

    # Choropleth
    choropleth = go.Choropleth(
        locations=plot_data["state_code"],
        z=plot_data["% Change"],
        locationmode='USA-states',
        colorscale=["white", "red"],
        zmin=vmin,
        zmax=vmax,
        colorbar_title="% Change",
        colorbar=dict(tickvals=[vmin, vmax]),
        marker_line_color='white',
        marker_line_width=0.5
    )

    # Text annotations
    scatter = go.Scattergeo(
        locationmode='USA-states',
        locations=plot_data["state_code"],
        text=plot_data["% Change"].round(1).astype(str) + '%',
        mode='text',
        textfont=dict(size=8, color='black'),
        showlegend=False
    )

    # Final figure
    fig = go.Figure(data=[choropleth, scatter])
    fig.update_layout(
        title_text=f"Percent Change in Methamphetamine Crude Death Rates<br>{group_name} (2018–2019 vs. 2020–2023)",
        geo_scope='usa'
    )

    fig.show()


In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# Load dataset
file_path = "/content/Heroin 2018-2023 (1).txt"
df = pd.read_csv(file_path, sep="\t", dtype=str)

# Define race groupings
race_groups = {
    "American Indian or Alaska Native": ["American Indian or Alaska Native"],
    "Asian/Pacific Islander": ["Asian", "Native Hawaiian or Other Pacific Islander"],
    "Black or African American": ["Black or African American"],
    "White": ["White"]
}

# State to abbreviation mapping
state_abbrev = {
    'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA',
    'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'District of Columbia': 'DC',
    'Florida': 'FL', 'Georgia': 'GA', 'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL',
    'Indiana': 'IN', 'Iowa': 'IA', 'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA',
    'Maine': 'ME', 'Maryland': 'MD', 'Massachusetts': 'MA', 'Michigan': 'MI',
    'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO', 'Montana': 'MT',
    'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ',
    'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND',
    'Ohio': 'OH', 'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA',
    'Rhode Island': 'RI', 'South Carolina': 'SC', 'South Dakota': 'SD', 'Tennessee': 'TN',
    'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA',
    'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY'
}

# Loop through each race group and create a map
for group_name, race_values in race_groups.items():
    # Filter and clean
    df_group = df[df["Single Race 6"].isin(race_values)]
    df_group = df_group[df_group["Year"].notna()]
    df_group["Year"] = df_group["Year"].astype(int)
    df_group["Crude Rate"] = pd.to_numeric(df_group["Crude Rate"], errors="coerce")
    df_group = df_group[df_group["Crude Rate"].notnull()]
    df_group = df_group[df_group["Year"].between(2018, 2023)]
    df_group["Period"] = np.where(df_group["Year"] <= 2019, "Pre-COVID", "Post-COVID")

    # Aggregate
    agg = df_group.groupby(["State", "Period"])["Crude Rate"].mean().unstack()

    # Skip if either period is missing
    if "Pre-COVID" not in agg.columns or "Post-COVID" not in agg.columns:
        print(f"Skipping {group_name}: missing data for one of the periods.")
        continue

    # Calculate percent change
    agg["% Change"] = ((agg["Post-COVID"] - agg["Pre-COVID"]) / agg["Pre-COVID"]) * 100
    agg = agg.reset_index()
    agg["state_code"] = agg["State"].map(state_abbrev)
    plot_data = agg.dropna(subset=["% Change", "state_code"])

    # Colorbar range
    vmin = plot_data["% Change"].min()
    vmax = plot_data["% Change"].max()

    # Choropleth
    choropleth = go.Choropleth(
        locations=plot_data["state_code"],
        z=plot_data["% Change"],
        locationmode='USA-states',
        colorscale=["white", "red"],
        zmin=vmin,
        zmax=vmax,
        colorbar_title="% Change",
        colorbar=dict(tickvals=[vmin, vmax]),
        marker_line_color='white',
        marker_line_width=0.5
    )

    # Text annotations
    scatter = go.Scattergeo(
        locationmode='USA-states',
        locations=plot_data["state_code"],
        text=plot_data["% Change"].round(1).astype(str) + '%',
        mode='text',
        textfont=dict(size=8, color='black'),
        showlegend=False
    )

    # Final figure
    fig = go.Figure(data=[choropleth, scatter])
    fig.update_layout(
        title_text=f"Percent Change in Heroin Crude Death Rates<br>{group_name} (2018–2019 vs. 2020–2023)",
        geo_scope='usa'
    )

    # Show and save
    fig.show()


Skipping American Indian or Alaska Native: missing data for one of the periods.
Skipping Asian/Pacific Islander: missing data for one of the periods.


In [None]:
!pip install plotly>=6.1.2
!wget https://github.com/plotly/orca/releases/download/v1.2.1/orca-1.2.1-x86_64.AppImage -O /usr/local/bin/orca
!chmod +x /usr/local/bin/orca
!apt-get install xvfb libgtk2.0-0 libgconf-2-4

--2025-06-13 07:58:35--  https://github.com/plotly/orca/releases/download/v1.2.1/orca-1.2.1-x86_64.AppImage
Resolving github.com (github.com)... 140.82.116.3
Connecting to github.com (github.com)|140.82.116.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/99037241/9dc3a580-286a-11e9-8a21-4312b7c8a512?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20250613%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250613T075835Z&X-Amz-Expires=300&X-Amz-Signature=e990b898804594a5449862675a68202ef10a170ae19bc2e892149c53d62e7b1b&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dorca-1.2.1-x86_64.AppImage&response-content-type=application%2Foctet-stream [following]
--2025-06-13 07:58:36--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/99037241/9dc3a580-286a-11e9-8a21-4312b7c8a512?X-Amz-Algorithm=AWS4-HMAC-SHA

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# Load dataset
file_path = "/content/Prescription Opioids 2018-2023 (1).txt"
df = pd.read_csv(file_path, sep="\t", dtype=str)

# Define race groupings
race_groups = {
    "American Indian or Alaska Native": ["American Indian or Alaska Native"],
    "Asian/Pacific Islander": ["Asian", "Native Hawaiian or Other Pacific Islander"],
    "Black or African American": ["Black or African American"],
    "White": ["White"]
}

# State to abbreviation mapping
state_abbrev = {
    'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA',
    'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'District of Columbia': 'DC',
    'Florida': 'FL', 'Georgia': 'GA', 'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL',
    'Indiana': 'IN', 'Iowa': 'IA', 'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA',
    'Maine': 'ME', 'Maryland': 'MD', 'Massachusetts': 'MA', 'Michigan': 'MI',
    'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO', 'Montana': 'MT',
    'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ',
    'New Mexico': 'NM', 'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND',
    'Ohio': 'OH', 'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA',
    'Rhode Island': 'RI', 'South Carolina': 'SC', 'South Dakota': 'SD', 'Tennessee': 'TN',
    'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA',
    'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY'
}

# Loop through each race group and create a map
for group_name, race_values in race_groups.items():
    # Filter and clean
    df_group = df[df["Single Race 6"].isin(race_values)]
    df_group = df_group[df_group["Year"].notna()]
    df_group["Year"] = df_group["Year"].astype(int)
    df_group["Crude Rate"] = pd.to_numeric(df_group["Crude Rate"], errors="coerce")
    df_group = df_group[df_group["Crude Rate"].notnull()]
    df_group = df_group[df_group["Year"].between(2018, 2023)]
    df_group["Period"] = np.where(df_group["Year"] <= 2019, "Pre-COVID", "Post-COVID")

    # Aggregate
    agg = df_group.groupby(["State", "Period"])["Crude Rate"].mean().unstack()

    # Skip if either period is missing
    if "Pre-COVID" not in agg.columns or "Post-COVID" not in agg.columns:
        print(f"Skipping {group_name}: missing data for one of the periods.")
        continue

    # Calculate percent change
    agg["% Change"] = ((agg["Post-COVID"] - agg["Pre-COVID"]) / agg["Pre-COVID"]) * 100
    agg = agg.reset_index()
    agg["state_code"] = agg["State"].map(state_abbrev)
    plot_data = agg.dropna(subset=["% Change", "state_code"])

    # Colorbar range
    vmin = plot_data["% Change"].min()
    vmax = plot_data["% Change"].max()

    # Choropleth
    choropleth = go.Choropleth(
        locations=plot_data["state_code"],
        z=plot_data["% Change"],
        locationmode='USA-states',
        colorscale=["white", "red"],
        zmin=vmin,
        zmax=vmax,
        colorbar_title="% Change",
        colorbar=dict(tickvals=[vmin, vmax]),
        marker_line_color='white',
        marker_line_width=0.5
    )

    # Text annotations
    scatter = go.Scattergeo(
        locationmode='USA-states',
        locations=plot_data["state_code"],
        text=plot_data["% Change"].round(1).astype(str) + '%',
        mode='text',
        textfont=dict(size=8, color='black'),
        showlegend=False
    )

    # Final figure
    fig = go.Figure(data=[choropleth, scatter])
    fig.update_layout(
        title_text=f"Percent Change in Prescription Opioid Crude Death Rates<br>{group_name} (2018–2019 vs. 2020–2023)",
        geo_scope='usa'
    )

    # Show and save
    fig.show()

Skipping American Indian or Alaska Native: missing data for one of the periods.
