In [1]:
import datetime as dt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from matplotlib.ticker import StrMethodFormatter
import seaborn as sns

In [2]:
all_data = pd.read_csv('all_data_with_intervals.csv')[['Bundesland','week_no','summer_vac','active_lockdown','total_population']]
populations = all_data.groupby('Bundesland')['total_population'].sum().reset_index()
intervals = all_data.drop(columns='total_population').drop_duplicates()

In [3]:
rki_weekly = pd.read_csv('rki_weekly.csv')
rki_weekly['Bundesland'] = rki_weekly.districtId // 1000
state_wise = rki_weekly.groupby(['Bundesland','week_no'])['AnzahlFall'].sum().reset_index().merge(populations)
state_wise['cases_per_cap'] = state_wise.AnzahlFall / state_wise.total_population

In [22]:
plot_data = pd.merge(state_wise, intervals, how='outer')
plot_data.fillna(value=False,inplace=True)
plot_data = plot_data[plot_data.week_no.between(6,39)].copy()#our model was only until week 38, with RF looking ahead 1 week
plot_lockdown = plot_data[plot_data.active_lockdown].copy()
plot_summer = plot_data[plot_data.summer_vac].copy()

In [59]:
# Y_VAR = 'cases_per_cap'
Y_VAR = 'AnzahlFall'

fig = go.Figure()

value_repo = {}

stackr = {}
for i in np.unique(plot_data.week_no):
    stackr[i] = 0

bl_range = range(1,17) #parametrized here in order to change order etc
    
for bl in bl_range:
    
    df_bl = plot_data[plot_data.Bundesland==bl]

    X_all = []
    Y_all = []
    X_lockdown = []
    Y_lockdown = []
    X_summer = []
    Y_summer = []

    for i, val in df_bl.iterrows():

        x_ = val.week_no
        stackr[x_] += val[Y_VAR]
        y_ = stackr[x_]

        X_all.append(x_)
        Y_all.append(y_)

        if val.active_lockdown:
            X_lockdown.append(x_)
            Y_lockdown.append(y_)

        if val.summer_vac:
            X_summer.append(x_)
            Y_summer.append(y_)
            
        
    value_repo[bl] = {'all':pd.Series(Y_all,index=X_all),
                      'Active Lockdown':pd.Series(Y_lockdown,index=X_lockdown),
                      'Summer Vacation':pd.Series(Y_summer,index=X_summer)}

    
def add_areas(bl, timeslice, color):
    
    suppl_line_attrs = {'mode':'lines','line_width':0,'hoverinfo':'none'}
    
    not_first = bl != bl_range[0]
    
    if not_first:#this creates invisible lines for the areas to fill down towards, otherwise area borders are not vertical
        last_y = value_repo[bl-1]['all']
        last_y = last_y[last_y.index.isin(vals[timeslice].index)]
        
        fig.add_trace(go.Scatter(x=vals[timeslice].index,
                                 y=last_y,
                                 fill=None,
                                 showlegend=False,
                                 **suppl_line_attrs))
    
    fig.add_trace(go.Scatter(x=vals[timeslice].index,
                             y=vals[timeslice],
                             fill='tonexty' if not_first else 'tozeroy',
                             #explicit tozeroy, because we add traces per BL multiple times,
                             name=None if not_first else timeslice,
                             showlegend= not not_first,
                             line_color=color,
                             **suppl_line_attrs))
    
for bl in bl_range: #not iter directly over dict to potentially change order
    vals = value_repo[bl]
    #now summer
    add_areas(bl, 'Summer Vacation', 'blue')
    
    #now the lockdown
    add_areas(bl, 'Active Lockdown', 'red')
    
    
    #now rest
    fig.add_trace(go.Scatter(x=vals['all'].index,
                             y=vals['all'],
                             fill=None,
                             showlegend=False,
                             line_color='black',
                             line_width=.9,
                             mode='lines',
                             hoverinfo='none'))

# fig.update_layout(showlegend=False)

fig.update_layout(
    title=dict(
        text="Weekly New Covid-19 Cases in Germany by Province",
        xanchor='center',
        x=.5),
    xaxis_title="2020 Calendar Week",
    yaxis_title="Weekly New Cases",
    legend_title="Period of Analysis:   ",
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1,
        xanchor="center",
        x=0.5
    ),
    font=dict(
        size=18,
        color="black"
    )
)
fig.show()


In [18]:
dir(fig)

['__class__',
 '__contains__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_add_annotation_like',
 '_allow_disable_validation',
 '_animation_duration_validator',
 '_animation_easing_validator',
 '_batch_layout_edits',
 '_batch_trace_edits',
 '_bracket_re',
 '_build_dispatch_plan',
 '_build_update_params_from_batch',
 '_config',
 '_data',
 '_data_defaults',
 '_data_objs',
 '_data_validator',
 '_dispatch_layout_change_callbacks',
 '_dispatch_trace_change_callbacks',
 '_frame_objs',
 '_frames_validator',
 '_get_child_prop_defaults',
 '_get_child_props',
 '_grid_ref',
 '_grid_str',
 '_in_batch_mode',
 '_index_is',
 '_init_child_props',
 

In [None]:
#THIS WORKS!!!!
fig = go.Figure()

value_repo = {}

stackr = {}
for i in np.unique(plot_data.week_no):
    stackr[i] = 0

for bl in range(1,17):
    
    df_bl = plot_data[plot_data.Bundesland==bl]

    X_all = []
    Y_all = []
    X_lockdown = []
    Y_lockdown = []
    X_summer = []
    Y_summer = []

    for i, val in df_bl.iterrows():

        x_ = val.week_no
        stackr[x_] += val.AnzahlFall
        y_ = stackr[x_]

        X_all.append(x_)
        Y_all.append(y_)

        if val.active_lockdown:
            X_lockdown.append(x_)
            Y_lockdown.append(y_)

        if val.summer_vac:
            X_summer.append(x_)
            Y_summer.append(y_)
            
        
    value_repo[bl] = {'X_all':X_all,
                     'Y_all':Y_all,
                      'all':pd.Series(Y_all,index=X_all),
                     'X_lockdown':X_lockdown,
                     'Y_lockdown':Y_lockdown,
                      'lockdown':pd.Series(Y_lockdown,index=X_lockdown),
                     'X_summer':X_summer,
                     'Y_summer':Y_summer,
                      'summer':pd.Series(Y_summer,index=X_summer)}


for bl, vals in value_repo.items():
    
    #now the lockdown
    if bl > 1:
        last_y = value_repo[bl-1]['all']
        last_y = last_y[last_y.index.isin(vals['lockdown'].index)]
        fig.add_trace(go.Scatter(x=vals['X_lockdown'],
                      y=last_y,
                      fill=None,
                     mode='lines'))
    
    fig.add_trace(go.Scatter(x=vals['X_lockdown'],
                      y=vals['Y_lockdown'],
                      fill='tonexty' if bl > 1 else 'tozeroy',#explicit tozero, because we add traces per BL multiple times,
                      mode='lines'))
    
    #now summer
    if bl > 1:
        last_y = value_repo[bl-1]['all']
        last_y = last_y[last_y.index.isin(vals['summer'].index)]
        fig.add_trace(go.Scatter(x=vals['X_summer'],
                      y=last_y,
                      fill=None,
                     mode='lines'))
    
    fig.add_trace(go.Scatter(x=vals['X_summer'],
                      y=vals['Y_summer'],
                      fill='tonexty' if bl > 1 else 'tozeroy',
                     mode='lines'))
    
    #now rest
    fig.add_trace(go.Scatter(x=vals['X_all'],
                      y=vals['Y_all'],
                      fill=None,
                      mode='lines'))


fig.show()

In [None]:
value_repo

In [6]:
# def state_pivot(df_in):
#     df = df_in.pivot(index='week_no',columns='Bundesland',values='AnzahlFall')
#     for c in df.columns:
#         df['BL'+str(c)] = df[c]
#         df.drop(columns=c,inplace=True)
        
#     return df

# wide_data = state_pivot(plot_data)
# wide_lockdown = state_pivot(plot_lockdown)
# wide_summer = state_pivot(plot_summer)

In [None]:
#OLD VERSION

fig = go.Figure()

def trace_addr(df,fill = False):
    stackr = {}
    for i in np.unique(plot_data.week_no):
        stackr[i] = 0
        
    for bl in range(1,17):
        df_bl = df[df.Bundesland==bl]
        all_bl = plot_data[plot_data.Bundesland==bl]
            
        X = []
        Y = []
        
        for i, val in all_bl.iterrows():
            
            x_ = val['week_no']
            stackr[x_] += val['AnzahlFall']
            if x_ in df_bl.week_no.values:
                y_ = stackr[x_]
                X.append(x_)
                Y.append(y_)
        
        if fill:
            fill_ = 'tonexty' if bl > 1 else 'tozeroy'#explicit tozero, because we'll call the function multiple times
        else:
            fill_ = None
            
        fig.add_trace(go.Scatter(x=X,
                          y=Y,
                          fill=fill_,
                          mode='lines'))

trace_addr(plot_data,False)
trace_addr(plot_lockdown,True)
trace_addr(plot_summer,True)

fig.show()

In [None]:
#OLD VERSION

fig = go.Figure()

def trace_addr(df,fill = False):
    stackr = {}
    for i in df.index:
        stackr[i] = 0
    for bl in range(1,17):
        
        for i, val in df['BL'+str(bl)].iteritems():
            stackr[i] += val
        
        if fill:
            fill_ = 'tonexty' if bl > 1 else 'tozeroy'
        else:
            fill_ = None
        fig.add_trace(go.Scatter(x=[*stackr.keys()],
                          y=[*stackr.values()],
                          fill=fill_,
                          mode='lines'))

# trace_addr(wide_data,False)
# trace_addr(wide_lockdown,True)
trace_addr(wide_summer,True)

fig.show()

In [None]:
for _,v in plot_data.iterrows():
    print(v.summer_vac)

In [None]:
col_list = [wide_data[c] for c in wide_data.columns]

# plt.stackplot(wide_data.index,*col_list,fill=None)
col_list

In [None]:
plot_data.AnzahlFall.plot.area()

### Issis Work below 

# Data Preparation

In [None]:
rki_daily = pd.read_csv('./rki_daily.csv')
rki_daily.head()

In [None]:
rki_daily.info()

#### Turn Meldedatum into datetime format

In [None]:
rki_daily.loc[:, 'Meldedatum'] = pd.to_datetime(rki_daily.loc[:, 'Meldedatum'])

In [None]:
rki_daily.info()

#### Add Bundesland as Preparation for Ferien

In [None]:
rki_daily['Bundesland'] = rki_daily.loc[:, 'districtId'] // 1000
rki_daily

#### Add Week No as Preparation for Ferien

In [None]:
rki_daily['week_no'] = rki_daily['Meldedatum'].dt.week
rki_daily.head()

#### Add Ferien
Same approach as in notebook 06_deviance_analysis.ipynb

In [None]:
summer_vac_dict = {8: 31,
                   9: 31,
                   11: 27,
                   12: 27,
                   4: 25,
                   2: 27,
                   6: 28,
                   13: 26,
                   3: 30,
                   5: 27,
                   7: 28,
                   10: 28,
                   14: 30,
                   15: 30,
                   1: 27,
                   16: 30}

summer_vac_df = pd.DataFrame()
offset = 1
# offsetting the summer vacation by one week, even though performance measure/target already
# 'looks' one week into the future, to include all travel-returners' effects. Could also be 0 or 2?

for bl, start in summer_vac_dict.items():
    summer_vac = [start + offset + no for no in range(0, 6)]
    bl_data = rki_daily.loc[rki_daily.loc[:, 'Bundesland']
                            == bl, ['districtId', 'week_no']].copy()
    bl_data['summer_vac'] = bl_data.week_no.isin(summer_vac)
    summer_vac_df = summer_vac_df.append(bl_data)

rki_daily = pd.merge(rki_daily, summer_vac_df, how='outer')

## Handling Lockdown and Summer Vacation

### Lockdown

In [None]:
rki_daily_plot = rki_daily.loc[:, ['Meldedatum',
                                   'AnzahlFall']].groupby('Meldedatum').sum()
rki_daily_plot.head()

In [None]:
# ToDo - what are the exact dates?
start_lck = dt.datetime(2020, 3, 20)
start_lck_wk = 12
end_lck = dt.datetime(2020, 5, 3)
end_lck_wk = 18

### Summer Vacation

In [None]:
rki_daily_vac = rki_daily.loc[:, ['Meldedatum', 'Bundesland', 'summer_vac', 'week_no', 'AnzahlFall']].groupby(
    ['summer_vac', 'Bundesland', 'week_no', 'Meldedatum']).sum()
rki_daily_vac.reset_index(drop=False, inplace=True)
rki_daily_vac.head()

# Visualizing Lockdown

Source: https://plotly.com/python/shapes/

In [None]:
import plotly
import logging
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [None]:
df_for_plotly = rki_daily_plot.copy()
df_for_plotly.reset_index(drop=False, inplace=True)
df_for_plotly

#### Visualizing Lockdown Function

In [None]:
def visualize_lockdown(df, start_lck, end_lck, start_color='palegreen', end_color='salmon'):
    """
    this function visualises the infections per day over all Bundesländer
    it requires:
        - a dataframe with corona infections per day
        - start and end date of the lockdown
        - colors for lines marking the start & end of the lockdown; defaults are respectively set
    """

    fig = go.Figure()

    # The main plot
    fig.add_trace(go.Scatter(x=df['Meldedatum'],
                             y=df['AnzahlFall'],
                             line_color='lightslategray',
                             line_width=2))

    fig.add_shape(
        type='line',
        yref='paper', y0=0, y1=1,
        xref='x', x0=start_lck, x1=start_lck,
        line=dict(color=start_color, width=3, dash='dash'),
        opacity=0.8,
    )

    fig.add_trace(go.Scatter(
        x=df_for_plotly.loc[df_for_plotly['Meldedatum']
                            == start_lck, 'Meldedatum'],
        y=np.array(df_for_plotly.loc[:, 'AnzahlFall'].max()),
        mode="lines+text",
        name="Lines and Text",
        text=["Lockdown Start"],
        textposition="top right",
        textfont=dict(
            #family="sans serif",
            size=12,
            color=start_color
        )
    ))

    fig.add_shape(
        type='line',
        yref='paper', y0=0, y1=1,
        xref='x', x0=end_lck, x1=end_lck,
        line=dict(color=end_color, width=3, dash='dash'),
        opacity=0.8,
    )

    fig.add_trace(go.Scatter(
        x=df_for_plotly.loc[df_for_plotly['Meldedatum']
                            == end_lck, 'Meldedatum'],
        y=np.array(df_for_plotly.loc[:, 'AnzahlFall'].max()),
        mode="lines+text",
        name="Lines and Text",
        text=["Lockdown End"],
        textposition="top right",
        textfont=dict(
            #family="sans serif",
            size=12,
            color=end_color
        )
    ))

    fig.update_layout(showlegend=False,
                      title_text='Corona Infections on a Daily Basis (all Bundesländer)')

    fig.show()

In [None]:
visualize_lockdown(df_for_plotly, start_lck, end_lck,
                   start_color='palegreen', end_color='salmon')

In [None]:
fig = go.Figure()

# The main plot
fig.add_trace(go.Scatter(x=df_for_plotly['Meldedatum'],
                         y=df_for_plotly['AnzahlFall'],
                         line_color='lightslategray',
                         line_width=2))

fig.add_shape(
    type='line',
    yref='paper', y0=0, y1=1,
    xref='x', x0=start_lck, x1=start_lck,
    line=dict(color="palegreen", width=3, dash='dash'),
    opacity=0.8,
)

fig.add_trace(go.Scatter(
    x=df_for_plotly.loc[df_for_plotly['Meldedatum']
                        == start_lck, 'Meldedatum'],
    y=np.array(df_for_plotly.loc[:, 'AnzahlFall'].max()),
    mode="lines+text",
    name="Lines and Text",
    text=["Lockdown Start"],
    textposition="top right",
    textfont=dict(
        #family="sans serif",
        size=12,
        color="palegreen"
    )
))

fig.add_shape(
    type='line',
    yref='paper', y0=0, y1=1,
    xref='x', x0=end_lck, x1=end_lck,
    line=dict(color="salmon", width=3, dash='dash'),
    opacity=0.8,
)

fig.add_trace(go.Scatter(
    x=df_for_plotly.loc[df_for_plotly['Meldedatum'] == end_lck, 'Meldedatum'],
    y=np.array(df_for_plotly.loc[:, 'AnzahlFall'].max()),
    mode="lines+text",
    name="Lines and Text",
    text=["Lockdown End"],
    textposition="top right",
    textfont=dict(
        #family="sans serif",
        size=12,
        color="salmon"
    )
))

fig.update_layout(showlegend=False,
                  title_text='Corona Infections on a Daily Basis (all Bundesländer)')

# Visualizing Summer Vacation

Sources: 
- general idea: https://towardsdatascience.com/interactive-climate-data-visualizations-with-python-plotly-de0472490b09
- for marking the start of the vacation - https://plotly.com/python/bar-charts/
- colors: https://community.plotly.com/t/plotly-colours-list/11730/5 & https://plotly.com/python/builtin-colorscales/
- show all values on x-axis: https://stackoverflow.com/questions/34755707/how-to-show-all-x-axis-tick-values-in-plotly

In [None]:
rki_vac_group = rki_daily_vac.groupby(
    ['summer_vac', 'week_no', 'Bundesland']).sum()
rki_vac_group.reset_index(drop=False, inplace=True)
rki_vac_group.loc[(rki_vac_group['Bundesland'] == 4), :]
# rki_vac_group

#### Visualising Summer Vacation Function

In [None]:
def visualize_summer_vac(df, summer_vac_dict,
                         main_color='lightslategray',
                         start_vac='palegreen',
                         end_vac='salmon'):
    """
    this function visualises the Corona Cases per Summer Vacation week for all the Bundesländer 
    that have vacation in these weeks
    it takes as inputs: 
        - a dataframe with columns 'summer_vac'(boolean), 'week_no', 'Bundesland', 'AnzahlFall' (Corona infections per week
        - a dictionary with keys the Bundesländer and values starting week of summer vacation
        - 
    it outputs: 
    - star & end of vacation
    """

    # modification of the summer vacation dictionary to get
    # unique starting weeks
    # bundesland lists per starting week (inverse dictionary)
    summer_vac_weeks = pd.Series(summer_vac_dict.values()).unique()
    summer_vac_weeks = np.sort(summer_vac_weeks)

    summer_vac_inv = {vac_wk: [k for k in summer_vac_dict.keys() if summer_vac_dict[k] == vac_wk]
                      for vac_wk in set(summer_vac_dict.values())}

    # mask creation for respective filtering
    vac_mask = df.loc[:, 'summer_vac'] == True
    week_list = rki_vac_group.loc[vac_mask, 'week_no'].unique()
    week_list_adj = week_list - 1
    week_mask = rki_vac_group.loc[:, 'week_no'].isin(week_list_adj)

    # Plotting
    fig = go.Figure()

    colors = [main_color, ] * len(week_list_adj)
    # rows calculation should be more parametised
    fig = make_subplots(rows=len(summer_vac_weeks)//2+1, cols=2,
                        shared_xaxes=False, shared_yaxes=False, vertical_spacing=0.15)

    for i in range(len(week_list_adj)):
        week_no = week_list_adj[i]
        if week_no in summer_vac_weeks:
            bl_list = summer_vac_inv[week_no]
            bl_mask = df.loc[:, 'Bundesland'].isin(bl_list)
            y_tmp = df.loc[(bl_mask) & (week_mask), [
                'week_no', 'AnzahlFall']].groupby('week_no').sum()
            y_tmp.reset_index(drop=False, inplace=True)
            colors_tmp = colors.copy()
            colors_tmp[i] = start_vac
            colors_tmp[i+5] = end_vac
            fig.add_trace(
                go.Bar(
                    x=week_list_adj,
                    y=y_tmp.loc[:, 'AnzahlFall'],
                    marker_color=colors_tmp
                ),
                row=((i // 2) + 1), col=((i % 2) + 1),
            )
            fig.update_xaxes(title_text=f'Vacation start in wk {week_no}  for Bundesländer {summer_vac_inv[week_no]}',
                             title_font=dict(
                                        size=12,
                                        color=main_color
                             ),
                             row=(i // 2) + 1, col=((i % 2) + 1), tickmode='linear')
        # 29 should be parametised
        elif week_no == 29:
            fig.add_trace(
                go.Bar(
                    x=week_list_adj,
                    y=np.zeros(len(week_list_adj),),
                ),
                row=((i // 2) + 1), col=((i % 2) + 1),
            )
            fig.update_xaxes(title_text=f'No vacation start in wk {week_no}',
                             title_font=dict(
                                        size=12,
                                        color=main_color
                             ),
                             row=(i // 2) + 1, col=((i % 2) + 1), tickmode='linear')

    fig.update_layout(showlegend=False,
                      title_text='Corona Infections per Summer Vacation Week')
    #fig.update_yaxes(range=[np.min(df.loc[vac_mask, 'AnzahlFall']),np.max(df.loc[vac_mask, 'AnzahlFall'])], fixedrange=True)
    fig.show()

In [None]:
visualize_summer_vac(rki_vac_group, summer_vac_dict,
                     main_color='lightslategray',
                     start_vac='palegreen',
                     end_vac='salmon')

In [None]:
summer_vac_weeks = pd.Series(summer_vac_dict.values()).unique()
summer_vac_weeks = np.sort(summer_vac_weeks)
summer_vac_weeks

In [None]:
rki_daily.loc[:, 'Bundesland'].unique()

https://stackoverflow.com/questions/54249400/python-how-to-group-keys-that-have-the-same-values-in-a-dictionary

In [None]:
summer_vac_inv = {vac_wk: [k for k in summer_vac_dict.keys() if summer_vac_dict[k] == vac_wk]
                  for vac_wk in set(summer_vac_dict.values())}
summer_vac_inv

In [None]:
summer_vac_inv

In [None]:
summer_vac_weeks

In [None]:
# adjustments are done in order for all Bundesländer to be included
# as starting date was taken the date in the summer_vac_dict

summer_vac_inv_adj = {vac_wk+1: [k for k in summer_vac_dict.keys() if summer_vac_dict[k] == vac_wk]
                      for vac_wk in set(summer_vac_dict.values())}
vac_mask = rki_vac_group.loc[:, 'summer_vac'] == True
week_list = rki_vac_group.loc[vac_mask, 'week_no'].unique()
week_list_adj = week_list - 1
week_mask = rki_vac_group.loc[:, 'week_no'].isin(week_list_adj)

In [None]:
fig = go.Figure()

colors = ['lightslategray', ] * len(week_list_adj)
fig = make_subplots(rows=len(summer_vac_weeks)//2+1, cols=2,
                    shared_xaxes=False, shared_yaxes=False, vertical_spacing=0.15)

for i in range(len(week_list_adj)):
    week_no = week_list_adj[i]
    if week_no in summer_vac_weeks:
        bl_list = summer_vac_inv[week_no]
        bl_mask = rki_vac_group.loc[:, 'Bundesland'].isin(bl_list)
        y_tmp = rki_vac_group.loc[(bl_mask) & (week_mask), [
            'week_no', 'AnzahlFall']].groupby('week_no').sum()
        y_tmp.reset_index(drop=False, inplace=True)
        colors_tmp = colors.copy()
        colors_tmp[i] = 'palegreen'
        colors_tmp[i+5] = 'salmon'
        fig.add_trace(
            go.Bar(
                x=week_list_adj,
                y=y_tmp.loc[:, 'AnzahlFall'],
                marker_color=colors_tmp
            ),
            row=((i // 2) + 1), col=((i % 2) + 1),
        )
        fig.update_xaxes(title_text=f'Vacation start in wk {week_no}  for Bundesländer {summer_vac_inv[week_no]}',
                         title_font=dict(
                                    size=12,
                                    color="lightslategray"
                         ),
                         row=(i // 2) + 1, col=((i % 2) + 1), tickmode='linear')
    elif week_no == 29:
        fig.add_trace(
            go.Bar(
                x=week_list_adj,
                y=np.zeros(len(week_list_adj),),
            ),
            row=((i // 2) + 1), col=((i % 2) + 1),
        )
        fig.update_xaxes(title_text=f'No vacation start in wk {week_no}',
                         title_font=dict(
                                    size=12,
                                    color="lightslategray"
                         ),
                         row=(i // 2) + 1, col=((i % 2) + 1), tickmode='linear')


fig.update_layout(showlegend=False,
                  title_text='Corona Infections per Summer Vacation Week')
#fig.update_yaxes(range=[np.min(rki_vac_group.loc[vac_mask, 'AnzahlFall']),np.max(rki_vac_group.loc[vac_mask, 'AnzahlFall'])], fixedrange=True)
fig.show()

In [None]:
vac_mask_31 = rki_vac_group.loc[:, 'summer_vac'] == True
wk_mask_31 = rki_vac_group.loc[:, 'week_no'] == 32
bl_mask_31 = rki_vac_group.loc[:, 'Bundesland'].isin([8, 9])

df_31 = rki_vac_group.loc[(vac_mask_31) & (wk_mask_31) & (
    bl_mask_31), :].groupby('week_no').sum()
df_31

In [None]:
rki_vac_group

# Plotly Filled Area

Sources: 
- https://www.geeksforgeeks.org/how-to-create-stacked-area-plot-using-plotly-in-python/
- https://community.plotly.com/t/color-change-by-range-in-line-chart/17829/2
- https://plotly.com/python/filled-area-plots/

In [None]:
rki_bl_cases = rki_daily_vac.loc[:, ['week_no', 'Bundesland', 'AnzahlFall']].groupby(
    ['week_no', 'Bundesland']).sum()
rki_bl_cases.reset_index(drop=False, inplace=True)
rki_bl_cases['cumsum'] = rki_bl_cases.groupby(
    by=['week_no'])['AnzahlFall'].transform(lambda x: x.cumsum())
#rki_bl_cases.loc[(rki_bl_cases['Bundesland'] == 4), :]
# rki_bl_cases

In [None]:
weeks_lck = np.arange(start_lck_wk, end_lck_wk+1)
weeks_lck

weeks_before_lck = np.arange(
    rki_bl_cases.loc[:, 'week_no'].min(), start_lck_wk)
weeks_after_lck = np.arange(end_lck_wk, rki_bl_cases.loc[:, 'week_no'].max())

In [None]:
before_lck = rki_bl_cases.loc[:, 'week_no'] < start_lck_wk
after_lck = rki_bl_cases.loc[:, 'week_no'] > end_lck_wk
lck_mask = rki_bl_cases.loc[:, 'week_no'].isin(weeks_lck)
# rki_vac_group.loc[after_lck,'week_no'].unique()

In [None]:
df_plt_filled = rki_bl_cases.copy()

In [None]:
#df_plt_filled['color_or_not'] = df_plt_filled['week_no'].apply(lambda x: True if x in weeks_lck or x in summer_vac_weeks else 'no lck or vac')

In [None]:
def f(df):
    if df['week_no'] in weeks_lck or df['week_no'] in summer_vac_weeks:
        val = f"{df['Bundesland']}, sum vac start {summer_vac_dict[df['Bundesland']]}"
    else:
        val = 'no lck or vac'
    return val

In [None]:
df_plt_filled['color_or_not'] = df_plt_filled.apply(f, axis=1)

In [None]:
df_plt_filled[df_plt_filled['week_no'] == 5].head()

In [None]:
import plotly.express as px
df = df_plt_filled

fig = px.area(df_plt_filled, x="week_no", y="AnzahlFall", color="color_or_not",
              line_group="Bundesland",
              labels={"week_no": "week no", "AnzahlFall": "Number of Infections",
                      "color_or_not": "Bundesland"}
              )


fig.add_shape(
    type='line',
    yref='paper', y0=0, y1=1,
    xref='x', x0=start_lck_wk, x1=start_lck_wk,
    line=dict(color="blue", width=2, dash='dash'),
    opacity=0.25,
)

fig.add_trace(go.Scatter(
    x=df_plt_filled.loc[df_plt_filled['week_no'] == start_lck_wk, 'week_no'],
    y=np.array(df_plt_filled.loc[:, 'cumsum'].max()),
    mode="lines+text",
    name="Lines and Text",
    text=["Lockdown Start"],
    textposition="top right",
    textfont=dict(
        #family="sans serif",
        size=12,
        color="blue"
    ),
    opacity=0.25,
    showlegend=False
))


fig.add_shape(
    type='line',
    yref='paper', y0=0, y1=1,
    xref='x', x0=end_lck_wk, x1=end_lck_wk,
    line=dict(color="blue", width=2, dash='dash'),
    opacity=0.25,
)

fig.add_trace(go.Scatter(
    x=df_plt_filled.loc[df_plt_filled['week_no'] == end_lck_wk, 'week_no'],
    y=np.array(df_plt_filled.loc[:, 'cumsum'].max()),
    mode="lines+text",
    name="Lines and Text",
    text=["Lockdown End"],
    textposition="top right",
    textfont=dict(
        #family="sans serif",
        size=12,
        color="blue"
    ),
    opacity=0.25,
    showlegend=False
))


for wk in summer_vac_weeks:
    fig.add_annotation(x=wk,
                       y=np.array(
                           df_plt_filled.loc[df_plt_filled['week_no'] == wk, 'cumsum'].max()),
                       #text=f'vac start {summer_vac_inv[wk]}',
                       showarrow=True,
                       arrowsize=2,
                       arrowhead=1,
                       arrowcolor='blue',
                       opacity=0.25,
                       textangle=0
                       )


fig.update_layout(showlegend=True,
                  title_text='Corona Infections per Bundesland. Highlights: Lockdown & Summer Vacation Week')

fig.show()

## Experiments

Source: 
- general: https://mode.com/example-gallery/python_chart_annotations/
- matplotlib colors: https://matplotlib.org/examples/color/named_colors.html

In [None]:
ax = rki_daily_plot['AnzahlFall'].plot(
    color='grey', figsize=(11, 8), fontsize=11, zorder=2)

# Despine
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

# Remove x-axis label
ax.set_xlabel('')

# Switch off ticks
ax.tick_params(axis="both", which="both", bottom="off", top="off",
               labelbottom="on", left="off", right="off", labelleft="on")

# Get y-axis tick values
vals = ax.get_yticks()

# Draw horizontal axis lines
for val in vals:
    ax.axhline(y=val, linestyle='dashed', alpha=0.3, color='grey', zorder=1)

# Format y-axis label
ax.yaxis.set_major_formatter(StrMethodFormatter('{x:,g}'))

# Set y-axis label
ax.set_ylabel("Corona Infections per Day",
              labelpad=20, weight='bold', color='grey')

# Set title
ax.set_title('Corona Infections on a Daily Basis (all Bundesländer)')

# Set y-axis limit
ylim = ax.set_ylim(bottom=0)


# Annotate
x_line_ann_start = start_lck
x_line_ann_end = end_lck
x_text_ann_start = start_lck
x_text_ann_end = end_lck
ax.axvline(x=x_line_ann_start, linestyle='dashed',
           alpha=0.8, color='palegreen')
ax.axvline(x=x_line_ann_end, linestyle='dashed', alpha=0.8, color='salmon')
ax.text(x=x_text_ann_start, y=np.max(
    rki_daily_plot.loc[:, 'AnzahlFall']), s='Lockdown Start', alpha=0.8, color='palegreen')
ax.text(x=x_text_ann_end, y=np.max(
    rki_daily_plot.loc[:, 'AnzahlFall']), s='Lockdown End', alpha=0.8, color='salmon')

Source: https://towardsdatascience.com/interactive-climate-data-visualizations-with-python-plotly-de0472490b09

In [None]:
import plotly.express as px

vac_mask_simple = rki_vac_group.loc[:, 'summer_vac'] == True
simple_df = rki_vac_group[vac_mask_simple]
fig = px.bar(simple_df, x='week_no', y='AnzahlFall',
             color='Bundesland', facet_row='Bundesland')
fig.show()

In [None]:
import plotly.express as px

In [None]:
short_df = rki_vac_group[(vac_mask_simple)]
fig = px.bar(short_df, x='Bundesland', y='AnzahlFall', color='AnzahlFall',
             facet_col='week_no')
fig.show()

In [None]:
g = sns.FacetGrid(rki_vac_group.loc[vac_mask, :],
                  row="week_no", col='Bundesland', hue="AnzahlFall")
g.map(sns.barplot, "week_no", "AnzahlFall", alpha=.7)
g.add_legend()

In [None]:
g = sns.FacetGrid(rki_vac_group, col='summer_vac', hue="Bundesland")
g.map(sns.barplot, "week_no", "AnzahlFall", alpha=.7)
g.add_legend()

# Plotly Filled Area Experiments

Main source: https://community.plotly.com/t/color-change-by-range-in-line-chart/17829/2
- secondary: https://plotly.com/python/filled-area-plots/

In [None]:
# colors_list = ['aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure',
#                'beige', 'bisque', 'black', 'blanchedalmond', 'blue',
#                'blueviolet', 'brown', 'burlywood', 'cadetblue',
#                'chartreuse', 'chocolate']
colors_list = ['#ef55f1', '#fb84ce', '#fbafa1', '#fcd471', '#f0ed35', '#c6e516', '#96d310', '#61c10b',
               '#31ac28', '#439064', '#3d719a', '#284ec8', '#2e21ea', '#6324f5', '#9139fa', '#c543fa']
len(colors_list)

In [None]:
df_plt_exp = rki_bl_cases.copy()

In [None]:
df_plt_exp['cumsum'].where(df_plt_exp.week_no.isin(weeks_lck))

In [None]:
# challenges:
# - it does not fill if set to 'tonexty' (see graph below)
# - if set to 'tozeroy', it fills everything and covers the previous colors (no opacity)
# - have to use cum sum, otherwise it does not stack it

import plotly.graph_objects as go
from plotly.offline import init_notebook_mode, iplot

fig = go.Figure()

for bl in df_plt_exp.loc[:, 'Bundesland'].unique():
    tmp_df = df_plt_exp.loc[df_plt_exp['Bundesland'] == bl, :]
    bl_mask = df_plt_exp.loc[:, 'Bundesland'] == bl
    vac_weeks_bl = np.arange(summer_vac_dict[bl], summer_vac_dict[bl] + 5)
    vac_mask = df_plt_exp.loc[:, 'week_no'].isin(vac_weeks_bl)

    fig.add_scattergl(x=tmp_df.loc[:, 'week_no'].unique(
    ), y=tmp_df['cumsum'], line=dict(color='darkgrey'))

    # lockdown
    fig.add_scattergl(x=tmp_df.loc[:, 'week_no'].unique(), y=tmp_df['cumsum'].where(
        tmp_df.week_no.isin(weeks_lck)), line=dict(color=colors_list[bl-1]), fill='toself')

    # vacation
    fig.add_scattergl(x=tmp_df.loc[:, 'week_no'].unique(), y=tmp_df['cumsum'].where(
        tmp_df.week_no.isin(vac_weeks_bl)), line=dict(color=colors_list[bl-1]), fill='toself')


fig.update_layout(showlegend=False,
                  title_text='Corona Infections per Bundesland. Highlights: Lockdown & Summer Vacation Week')
fig.show()

In [None]:
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode, iplot

fig = go.Figure()


for bl in df_plt_exp.loc[:, 'Bundesland'].unique():
    tmp_df = df_plt_exp.loc[df_plt_exp['Bundesland'] == bl, :]
    bl_mask = df_plt_exp.loc[:, 'Bundesland'] == bl
    vac_weeks_bl = np.arange(summer_vac_dict[bl], summer_vac_dict[bl] + 5)
    vac_mask = rki_bl_cases.loc[:, 'week_no'].isin(vac_weeks_bl)

    fig.add_scattergl(x=tmp_df.loc[:, 'week_no'].unique(
    ), y=tmp_df['cumsum'], line=dict(color='darkgrey'))

    # lockdown
    fig.add_scattergl(x=tmp_df.loc[:, 'week_no'].unique(), y=tmp_df['cumsum'].where(
        tmp_df.week_no.isin(weeks_lck)), line=dict(color=colors_list[bl-1]), fill='tonexty')

    # vacation
    fig.add_scattergl(x=tmp_df.loc[:, 'week_no'].unique(), y=tmp_df['cumsum'].where(
        tmp_df.week_no.isin(vac_weeks_bl)), line=dict(color=colors_list[bl-1]), fill='tonexty')


fig.update_layout(showlegend=False,
                  title_text='Corona Infections per Bundesland. Highlights: Lockdown & Summer Vacation Week')
fig.show()

In [None]:
# challenges:
# - different graphs with each trace

import plotly.graph_objects as go

fig = go.Figure()
for bl in df_plt_exp.loc[:, 'Bundesland'].unique():
    if bl == 1:
        # before the lockdown
        fig.add_trace(go.Scatter(x=df_plt_exp.loc[before_lck, 'week_no'].unique(),
                                 y=df_plt_exp.loc[(df_plt_exp['Bundesland'] == bl) & (before_lck), 'AnzahlFall'], fill='none'))

        # lockdown
        fig.add_trace(go.Scatter(x=df_plt_exp.loc[lck_mask, 'week_no'].unique(),
                                 y=df_plt_exp.loc[(df_plt_exp['Bundesland'] == bl) & (lck_mask), 'AnzahlFall'], fill='tozeroy'))  # fill down to xaxis

        # after lockdown
        fig.add_trace(go.Scatter(x=df_plt_exp.loc[after_lck, 'week_no'].unique(),
                                 y=df_plt_exp.loc[(df_plt_exp['Bundesland'] == bl) & (after_lck), 'AnzahlFall'], fill='none'))

    else:
        # before the lockdown
        fig.add_trace(go.Scatter(x=df_plt_exp.loc[before_lck, 'week_no'].unique(),
                                 y=df_plt_exp.loc[(df_plt_exp['Bundesland'] == bl) & (before_lck), 'AnzahlFall'], fill='none'))

        # lockdown
        fig.add_trace(go.Scatter(x=df_plt_exp.loc[lck_mask, 'week_no'].unique(),
                                 y=df_plt_exp.loc[(df_plt_exp['Bundesland'] == bl) & (lck_mask), 'AnzahlFall'], fill='tonexty'))  # fill to trace0 y

        # after lockdown
        fig.add_trace(go.Scatter(x=df_plt_exp.loc[after_lck, 'week_no'].unique(),
                                 y=df_plt_exp.loc[(df_plt_exp['Bundesland'] == bl) & (after_lck), 'AnzahlFall'], fill='none'))


fig.show()

In [None]:
# challenges
# - if not the same stackgroup - different colors, if the same one - too much stacking - the numbers are not correct

import plotly.graph_objects as go

fig = go.Figure()
for bl in df_plt_exp.loc[:, 'Bundesland'].unique():
    tmp_df = df_plt_exp.loc[df_plt_exp['Bundesland'] == bl, :]
    # no lockdown, no vacation
    fig.add_trace(go.Scatter(x=tmp_df.loc[:, 'week_no'].unique(),
                             y=tmp_df.AnzahlFall.where(~tmp_df.week_no.isin(
                                 weeks_lck) | ~tmp_df.week_no.isin(summer_vac_weeks)),
                             stackgroup='one',  # if not the same, different colors
                             fill='none',
                             showlegend=True,
                             name=f'{bl}_no_fill',

                             ))

    #lockdown or vacation
    fig.add_trace(go.Scatter(x=tmp_df.loc[:, 'week_no'].unique(),
                             y=tmp_df.AnzahlFall.where(tmp_df.week_no.isin(
                                 weeks_lck) | tmp_df.week_no.isin(summer_vac_weeks)),
                             stackgroup='one',
                             fill='tonexty',
                             showlegend=True,
                             name=f'{bl}_fill'
                             ))  # fill down to xaxis

fig.show()

In [None]:
df_plt_exp_test = rki_vac_group.loc[rki_vac_group['Bundesland'] == 1, :]

In [None]:
before_lck_df = df_plt_exp_test.loc[:, 'week_no'] < start_lck_wk
after_lck_df = df_plt_exp_test.loc[:, 'week_no'] > end_lck_wk
lck_df = df_plt_exp_test.loc[:, 'week_no'].isin(weeks_lck)
#df_test.loc[lck_df, :]
# df_test.tail(30)

In [None]:
df_plt_exp_test_g = df_plt_exp_test[['week_no', 'Bundesland', 'AnzahlFall']].groupby(
    ['week_no', 'Bundesland']).sum()
df_plt_exp_test_g.reset_index(drop=False, inplace=True)
#df_test_g.loc[(df_test_g['Bundesland'] == 1), :]

In [None]:
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode, iplot

fig = go.Figure()

fig.add_scattergl(x=df_plt_exp_test_g.loc[:, 'week_no'].unique(
), y=df_plt_exp_test_g.AnzahlFall, line=dict(color='salmon'))
fig.add_scattergl(x=df_plt_exp_test_g.loc[:, 'week_no'].unique(), y=df_plt_exp_test_g.AnzahlFall.where(
    df_plt_exp_test_g.week_no.isin(weeks_lck)), line=dict(color='salmon'), fill='tozeroy')
fig.add_scattergl(x=df_plt_exp_test_g.loc[:, 'week_no'].unique(), y=df_plt_exp_test_g.AnzahlFall.where(
    df_plt_exp_test_g.week_no.isin(np.arange(27, 27+5))), line=dict(color='salmon'), fill='tozeroy')

iplot(fig)