In [2]:
import pandas as pd
import plotly.express as px

# Load raw data and assign column names
df_raw = pd.read_csv("../../data/state_data/UNITED STATES.csv", header=None)
columns = [
    "Index",
    "Group",
    "All Substances",
    "Alcohol Only",
    "Alcohol with secondary drug",
    "Heroin",
    "Other opiates",
    "Cocaine (smoked)",
    "Cocaine (other route)",
    "Marijuana",
    "Amphetamines",
    "Other stimulants",
    "Tranquilizers",
    "Sedatives",
    "Hallucinogens",
    "PCP",
    "Inhalants",
    "Other/Unknown",
]
df_raw.columns = columns

# Keep only desired racial and ethnic groups
included_groups = [
    "White",
    "Black or African-American",
    "American Indian or Alaska Native",
    "Asian or Native Hawaiian or Other Pacific Islander",
    "Other",
    "Unknown",
    "Hispanic or Latino",
]

# Filter and reshape
filtered_df = df_raw[df_raw["Group"].isin(included_groups)].copy()
long_df = filtered_df.melt(
    id_vars=["Group"], var_name="Substance", value_name="Percentage"
)
long_df = long_df[long_df["Substance"] != "Index"]
long_df["Percentage"] = pd.to_numeric(long_df["Percentage"], errors="coerce")

# Create the bar chart
fig = px.bar(
    long_df,
    x="Substance",
    y="Percentage",
    color="Group",
    barmode="group",
    title="<b>Race/Ethnicity Breakdown of U.S. Substance Use Treatment Admissions<b>",
    labels={"Percentage": "% of National Treatment Admissions"},
)

fig.update_layout(
    xaxis_tickangle=-45,
    title_x=0.5,
    plot_bgcolor="#f9f9f9",
    legend=dict(
        x=1,
        xanchor="left",
        y=1,
        yanchor="top",
        bgcolor="rgba(255,255,255,0)",  # transparent background
        bordercolor="rgba(0,0,0,0)",  # no border
        font=dict(size=10),  # shrink font a little if needed
    ),
)
fig.write_html("../../outputs/race.html", include_plotlyjs="cdn")
fig.show()