In [2]:
import pandas as pd
import numpy as np
import altair as alt

df = pd.read_csv("../../data/clean_data/joined_data.csv")
df["addiction_per_10000"] = df["addiction_pc"] * 10000
df["log_addiction_per_10000"] = df["addiction_per_10000"].apply(
    lambda x: 0 if x == 0 else np.log(x)
)

# create color palette
palette = ["#610061", "#8B0E4D", "#A3193A", "#CF3E2A", "#FF6E42"]

# create a color scale
log_min, log_max = df["log_addiction_per_10000"].min(), df["log_addiction_per_10000"].max()


# Brush selection for interactive filtering
brush = alt.selection_interval(
    encodings=['x','y'],
    mark=alt.MarkConfig(fill='lightgrey', stroke='black')
)

# create scatterplot object
scatterplot = alt.Chart(df).mark_circle(size=30).encode(
    x=alt.X('pct_bach:Q', title="Percent of Adults with Bachelor's Degree"),
    y=alt.Y('mhi:Q', title="Median Household Income"),

    # encode color conditionally based on whether the point is selected
    color=alt.condition(brush,
        alt.Color('log_addiction_per_10000:Q', 
            scale=alt.Scale(
                      domain=[log_min, log_max],
                      range=palette
                  ), 
            title="Log Addiction Deaths per 10,000"),
        alt.value('lightgray')),
    opacity = alt.value(0.7),
    # create interactive tooltip
    tooltip=[
        alt.Tooltip('state_name:N', title='State'),
        alt.Tooltip('county_name:N', title='County'),
        alt.Tooltip('pct_bach:Q', title="Bachelor's (%)", format=".1f"),
        alt.Tooltip('mhi:Q', title='Median Income ($)', format=",.0f"),
        alt.Tooltip('addiction_per_10000:Q', title='Addiction Deaths per 10,000', format=",.1f")
    ]
).add_params(
    brush
)


# compute minimum and maximum values for the x axes
xmins, xmaxs = df['addiction_per_10000'].min(), df['addiction_per_10000'].max()

density = alt.Chart(df).transform_filter(
    brush
).transform_density(
    'addiction_per_10000',
    as_=['addiction_per_10000','density']
).mark_area(
    fill=palette[2]
).encode(
    x=alt.X('addiction_per_10000:Q',
    title = 'Addiction Deaths per 10,000 People',

    # keep x and y axes consistent
    scale=alt.Scale(domain=[xmins, xmaxs])),
    y= alt.Y('density:Q',
    title = 'Density',
    scale = alt.Scale(domain=[0, 0.5]))
).properties(
    title='Density of Addiction Deaths per 10,000 People'
).add_params(
    brush
)

linked = scatterplot & density

linked

# save the chart as an HTML file
linked.save('../../outputs/scatterplot_density.html')