In [1]:
import pandas as pd
import numpy as np
import altair as alt
from altair import datum
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:

#highlighting important NPIs in WA
data = {'date': [ "2020-03-23", "2020-06-01", "2020-11-18", "2021-02-14"], 'event':[ "Stay at home", "Stay at home lifted", "Closing restaurants", "Reopening restaurants"]}

npidf = pd.DataFrame(data)
npidf.date = pd.to_datetime(npidf.date)

rule = alt.Chart(npidf).mark_rule(
    color="black",
    strokeWidth=2, 
    opacity = 0.3
).encode(
    alt.X('date:T', axis=alt.Axis(title=None))
).properties(
    width=800,
    height=300
)

text = alt.Chart(npidf).mark_text(
    align='left',
    baseline='middle',
    dx=2,
    dy=-135,
    size=13
).encode(
    alt.X('date:T',axis=alt.Axis(title=None)),
    text='event',
    color=alt.value('#000000')
).properties(
    width=800,
    height=300
)

In [3]:
north_percent_intro= "../data-files/north_percent_intro.csv"
south_percent_intro = "../data-files/south_percent_intro.csv"
percent_intro_mob = "../data-files/percent_intro_from_mob.csv"

In [4]:

north_df = pd.read_csv(north_percent_intro)
south_df = pd.read_csv(south_percent_intro)
mob_df = pd.read_csv(percent_intro_mob)
north_rt_df = pd.read_csv("../data-files/combined_rt_north.csv")

south_rt_df = pd.read_csv("../data-files/combined_rt_south.csv")

In [5]:
mob_df = mob_df[mob_df.date < "2022-03-02"]
mob_df = mob_df[mob_df.date > "2020-02-01"]

In [6]:
mob_df = mob_df.drop(columns = ['Unnamed: 0'])
mob_df = mob_df.rename(columns={"Region": "Region (mobility data)"})

In [7]:
mob_long = mob_df.melt('date', var_name='Region (mobility data)', value_name='movement')

In [8]:
mob_long

Unnamed: 0,date,Region (mobility data),movement
0,2020-02-03,north,0.335072
1,2020-02-10,north,0.344625
2,2020-02-17,north,0.354180
3,2020-02-24,north,0.330901
4,2020-03-02,north,0.369899
...,...,...,...
213,2022-01-31,south,0.339353
214,2022-02-07,south,0.349816
215,2022-02-14,south,0.341307
216,2022-02-21,south,0.377231


In [9]:
percent_cases_from_intro_mob = alt.Chart(mob_long, width = 750).mark_line(interpolate='monotone', opacity = 1.0, color = "black").encode(
    alt.X('date:T', axis=alt.Axis(title=None, grid=False)),
    alt.Y('movement:Q',axis=alt.Axis(title="", grid=False)), 
    alt.StrokeDash("Region (mobility data):N", legend=alt.Legend(orient = "none", legendX = 870, legendY= 50, labelFontSize = 12, titleFontSize = 12))).properties(
    width=800,
    height=400
)


percent_cases_from_intro_mob

In [10]:
north_test = north_df[north_df.date < "2022-03-02"]
south_test = south_df[south_df.date <"2022-03-02"]

In [11]:
north_test["Region (sequence data)"] = "North King County"
south_test["Region (sequence data)"] = "South King County"

combined_percent = pd.concat([ north_test, south_test], ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  north_test["Region (sequence data)"] = "North King County"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  south_test["Region (sequence data)"] = "South King County"


In [12]:
combined_percent

Unnamed: 0.1,Unnamed: 0,interval,mean_percent,upper_hpd_log_95,lower_hpd_log_95,upper_hpd_log_50,lower_hpd_log_50,days,date,Region (sequence data)
0,0,5,0.529282,0.678281,0.388372,0.561614,0.456923,5,2022-03-01,North King County
1,0,6,0.552527,0.721814,0.400407,0.595068,0.476056,6,2022-02-28,North King County
2,0,7,0.522786,0.660675,0.375107,0.571755,0.472472,7,2022-02-27,North King County
3,0,8,0.516833,0.655783,0.367788,0.560708,0.457702,8,2022-02-26,North King County
4,0,9,0.509183,0.645999,0.351362,0.558426,0.462050,9,2022-02-25,North King County
...,...,...,...,...,...,...,...,...,...,...
1515,0,760,6.588739,42.406451,-56.279587,3.679823,-1.654827,760,2020-02-05,South King County
1516,0,761,0.820791,14.449318,-20.439144,0.305025,-2.158365,761,2020-02-04,South King County
1517,0,762,103.936423,13.806785,-13.984609,1.893982,0.039514,762,2020-02-03,South King County
1518,0,763,0.502939,6.305342,-2.282828,0.984386,0.372146,763,2020-02-02,South King County


In [13]:
line1 = alt.Chart(combined_percent).mark_area(interpolate='monotone').encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,  format="%B %Y")),
    alt.Y('lower_hpd_log_50',axis=alt.Axis(title="Percent of cases due to introductions", grid=False, format='%')),
    alt.Y2('upper_hpd_log_50' ),
    alt.Color("Region (sequence data):N", legend=alt.Legend(offset = -140, labelFontSize = 12, titleFontSize = 12))
).properties(
    width=1000,
    height=300
).transform_filter(
    (datum.lower_hpd_log_50 >0) & (datum.upper_hpd_log_50 < 1)
)

band1 = alt.Chart(combined_percent).mark_area(
    opacity=0.3, interpolate='monotone'
).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False)),
    alt.Y('lower_hpd_log_95', axis=alt.Axis(title="", grid=False)),
    alt.Y2('upper_hpd_log_95'), 
    alt.Color("Region (sequence data):N")
).properties(
    width=1000,
    height=300
).transform_filter(
    (datum.lower_hpd_log_95 >0) & (datum.upper_hpd_log_95 < 0.7)
)

band1 + line1

In [14]:
percent_case_intro = band1+ line1
percent_case_intro.configure_axis(
    labelFontSize=14,
    titleFontSize=14
)

In [15]:
stream_south = alt.Chart(south_rt_df, title = "South King County").mark_area(interpolate='monotone', opacity = .7 ,color = "#f58518", clip = True).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B %Y")),
    alt.Y('mean_percent',axis=alt.Axis(title="", grid=False), stack = False, scale=alt.Scale(domain=(0, 2.5))),
    #alt.Y2('upper_hpd_log_50' ), 
    alt.Color('Contribution:N', scale=alt.Scale(domain = ['Local', "Other King County Region", "Outside King County"], range = [ "#4c90c0", "#ceb541", "#df4327"]),
             legend=alt.Legend(offset = -160, labelFontSize = 12, titleFontSize = 12))
).properties(
    width=475,
    height=300
).transform_filter(
    (datum.mean_percent >0) & (datum.mean_percent < 2.5)
)

stream_north = alt.Chart(north_rt_df, title = "North King County").mark_area(interpolate='monotone', opacity = .7 ,color = "#f58518", clip = True).encode(
    alt.X('date:T', axis=alt.Axis(title="", grid=False,format="%B %Y")),
    alt.Y('mean_percent',axis=alt.Axis(title="Local Rt", grid=False),stack = False, scale=alt.Scale(domain=(0, 2.5))),
    #alt.Y2('upper_hpd_log_50' ), 
    alt.Color('Contribution:N', scale=alt.Scale(domain = ['Local', "Other King County Region", "Outside King County"], range = [ "#4c90c0", "#ceb541", "#df4327"]))
).properties(
    width=475,
    height=300
).transform_filter(
    (datum.mean_percent >0) & (datum.mean_percent < 2.5)
)

one_line = alt.Chart(pd.DataFrame({'y': [1.0]})).mark_rule(strokeDash=[1,1]).encode(y='y')

In [16]:
rt_streamplot = ((stream_north +one_line) | (stream_south+one_line))
rt_streamplot

In [17]:
combined_percent_intro = (percent_case_intro + percent_cases_from_intro_mob)
combined_percent_intro

In [18]:
figure5 = (combined_percent_intro & rt_streamplot).resolve_scale(color = "independent").configure_axis(
    labelFontSize=14,
    titleFontSize=14
)
figure5

In [19]:
figure5.save("../figures/figure5.html")

In [20]:
figure5 = persist & percent_case_intro
figure5 = figure5.configure_axis(
    labelFontSize=14,
    titleFontSize=14
)
figure5

NameError: name 'persist' is not defined

In [None]:
#figure5.save("figure5_percent.png")