In [None]:
import pandas as pd
import numpy as np
import ruptures as rpt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import ipywidgets as widgets
import matplotlib.pyplot as plt
from datetime import datetime
from dateutil import parser

pd.options.plotting.backend = "plotly"

%matplotlib inline

In [None]:
%store -r df
%store -r covid_cases

In [None]:
df['date'] = pd.to_datetime(df['series'])

In [None]:
df = df[df['date'] > '2018-10-01']
df = df[df['date'] < '2021-12-01']

In [None]:
(df.groupby(['hour','series'])
   .median().unstack('series')['steps'].plot.line())

In [None]:
df.groupby(['id','series']).median().unstack('series')['steps'].count().plot(title="Number of users for each month")

In [None]:
df.groupby(['id','series']).median().unstack('series')['steps'].count()

In [None]:
months = df.groupby(['hour','series']).median().unstack('series')['steps'].sum()
months

In [None]:
months.plot.line(markers=True, line_shape='spline', height=600)

In [None]:
month_before = df[df.series == '2020-02-16'].groupby(['hour','series']).median().unstack('series')['steps'].sum()['2020-02-16']
month_before

In [None]:
period_before = df[df.date < '2020-03-16'].groupby(['hour','series']).median().unstack('series')['steps'].sum().median()
period_before

In [None]:
month_list = months.keys()
rows = []
for month in month_list:
    value = months[month]
    rows.append([month, value, value - period_before])
    
months_diff = pd.DataFrame(rows, columns=['month', 'steps', 'diff'])

In [None]:
months_diff['cu_sum'] = months_diff['diff'].cumsum()

In [None]:
months_diff['diff'].plot.line(markers=True, line_shape='spline', height=600)

In [None]:
months_diff['cu_sum'].plot.line(markers=True, line_shape='spline', height=700)

In [None]:
covid_cases['scaled'] = covid_cases.cases / 150
fig = px.line(x=months_diff['month'], y=months_diff['diff'])
fig.add_bar(x=covid_cases['date'], y=covid_cases['scaled'])
fig.show()

In [None]:
# covid_cases['scaled'] = covid_cases.cases / 150
fig = make_subplots(specs=[[{ "secondary_y": True }]])
fig.add_trace(go.Line(x=months_diff['month'], y=months_diff['diff'], name="Steps"), secondary_y=True,)
fig.add_trace(go.Bar(x=covid_cases['date'], y=covid_cases['cases'], name="Covid cases"), secondary_y=False,)
fig.update_layout(
    title_text="Covid cases vs difference in steps each month"
)
fig.update_yaxes(title_text="Cases", secondary_y=False, domain=[0, 1])
fig.update_yaxes(title_text="Steps", secondary_y=True, domain=[0, 0])

fig.update_xaxes(
    dtick="M1",
    tickformat="%b\n%Y")

fig.show()