In [54]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import plotly.io as pio
from sklearn.preprocessing import MinMaxScaler
#pio.renderers.default = "browser"


In [55]:
features_train = pd.read_csv('../data/dengue_features_train.csv')
labels_train = pd.read_csv('../data/dengue_labels_train.csv')
features_test = pd.read_csv('../data/dengue_features_test.csv')

# join training labels and features
features_train.loc[:, "total_cases"] = labels_train.loc[:, "total_cases"]

In [64]:
# Set date as index 
features_train["date"] = pd.to_datetime(features_train.loc[:, "week_start_date"], format="%Y-%m-%d")
features_train.set_index("date", inplace=True, drop=True)
#features_train.set_index("week_start_date", inplace=True)

# Split by city and 
sj_full = features_train.loc[features_train.loc[:,"city"]=="sj"]
iq_full = features_train.loc[features_train.loc[:,"city"]=="iq"]

# drop the columns city, year and week of year
sj = sj_full.drop(["city", "year", "weekofyear", "week_start_date"], axis=1)
iq = iq_full.drop(["city", "year", "weekofyear", "week_start_date"], axis=1)

# Scale data
scaler = MinMaxScaler()
sj_scaled = scaler.fit_transform(sj)
sj_scaled = pd.DataFrame(columns=sj.columns, data= sj_scaled, index=sj.index)
iq_scaled = scaler.fit_transform(iq)
iq_scaled = pd.DataFrame(columns=iq.columns, data= iq_scaled, index=iq.index)


In [66]:
fig = make_subplots(rows=2, cols=1, shared_xaxes=True, subplot_titles = ["San Juan", "Iquitos"], 
                    vertical_spacing=0.02)
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
          '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', 
          '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', 
          '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', 'black']

for i, c in enumerate(sj_scaled.columns):
    fig.add_trace(go.Scatter(x=sj_scaled.index, 
                             y=sj_scaled.loc[:,c],
                             mode='lines', # 'lines' or 'markers'
                             name=c,
                             line_color=colors[i],
                             legendgroup=f'group{i}',
                             showlegend=False,),
                  
                  row=1, col=1)

for i, c in enumerate(iq_scaled.columns):
    fig.add_trace(go.Scatter(x=iq_scaled.index, 
                             y=iq_scaled.loc[:,c],
                             mode='lines', # 'lines' or 'markers'
                             line_color=colors[i],
                             legendgroup=f'group{i}',
                             name=c),
                 row=2, col=1)
fig.show()

In [47]:
labels_sj = sj_full.loc[:,["year", "weekofyear", "total_cases"]]
labels_iq = iq_full.loc[:,["year", "weekofyear", "total_cases"]]

In [48]:
sj = labels_sj.groupby(["year", "weekofyear"]).mean(numeric_only=True).unstack().sort_index(ascending=False)
sj.columns = sj.columns.droplevel()
fig = px.imshow(sj, title="Total Cases San Juan")
fig.show()

In [41]:
new_cases = labels_sj.copy()
new_cases["new_cases"] = new_cases.loc[:,"total_cases"]- new_cases.loc[:,"total_cases"].shift(1)
new_cases.drop("total_cases", inplace=True, axis=1)
sj = new_cases.groupby(["year", "weekofyear"]).mean(numeric_only=True).unstack().sort_index(ascending=False)
sj.columns = sj.columns.droplevel()
fig = px.imshow(sj, title="days with increasing Cases San Juan")
fig.show()

In [49]:
new_cases.loc[~(new_cases['new_cases']>0), "new_cases"] = np.nan
sj = new_cases.groupby(["year", "weekofyear"]).mean(numeric_only=True).unstack().sort_index(ascending=False)
sj.columns = sj.columns.droplevel()
fig = px.imshow(sj, title="days with increasing Cases San Juan")
fig.show()

In [19]:
iq = labels_iq.groupby(["year", "weekofyear"]).mean(numeric_only=True).unstack().sort_index(ascending=False)
iq.columns = iq.columns.droplevel()

fig = px.imshow(iq, title="Total Cases Iquito")
fig.show()

In [33]:
new_cases_iq = labels_iq.copy()
new_cases_iq["new_cases"] = new_cases_iq.loc[:,"total_cases"]- new_cases_iq.loc[:,"total_cases"].shift(1)
new_cases_iq.drop("total_cases", inplace=True, axis=1)

iq = new_cases_iq.groupby(["year", "weekofyear"]).mean(numeric_only=True).unstack().sort_index(ascending=False)
iq.columns = iq.columns.droplevel()
fig = px.imshow(iq, title="days with increasing Cases Iquitos")
fig.show()

In [34]:
new_cases_iq.loc[~(new_cases_iq['new_cases']>0), "new_cases"] = np.nan
iq = new_cases_iq.groupby(["year", "weekofyear"]).mean(numeric_only=True).unstack().sort_index(ascending=False)
iq.columns = iq.columns.droplevel()
fig = px.imshow(iq, title="days with increasing Cases Iquitos")
fig.show()