# Streamgraph en la R.M.
> Streamgraph en la R.M.

- toc: true 
- badges: true
- comments: true
- author: Alonso Silva Allende
- categories: [jupyter]
- image: images/fallecimientos.png

In [1]:
#hide
import numpy as np
import pandas as pd
import altair as alt

In [2]:
#hide
county_cumulative_cases = pd.read_csv(
    "https://raw.githubusercontent.com/MinCiencia/Datos-COVID19/master/output/producto1/Covid-19.csv")

In [3]:
#hide
county_cumulative_cases_RM = county_cumulative_cases.query("Region=='Metropolitana'").drop(
    columns=["Region", "Codigo region", "Poblacion", "Tasa", "Codigo comuna"])

In [4]:
#hide
county_cases_RM = county_cumulative_cases_RM.set_index("Comuna").T.diff().dropna()

In [5]:
#hide
county_cases_RM["Total"] = county_cases_RM.sum(axis=1)
county_cases_RM = county_cases_RM.T.apply(lambda x: x/x[-1]).T

In [6]:
#hide
start_date = county_cases_RM.reset_index()["index"].iloc[0]
end_date = county_cases_RM.reset_index()["index"].iloc[-1]

data = pd.DataFrame()
data['Fecha'] = pd.date_range(start=start_date, end=end_date).strftime('%Y-%m-%d')
data = data.set_index("Fecha")

In [7]:
#hide
data[list(county_cases_RM.columns)] = county_cases_RM[list(county_cases_RM.columns)]

In [8]:
#hide
data = data.interpolate(method="linear")

In [9]:
#hide
last_known_proportion = data.iloc[-1]

In [10]:
#hide
regional_cumulative_cases = pd.read_csv(
    'https://raw.githubusercontent.com/MinCiencia/Datos-COVID19/master/output/producto3/CasosTotalesCumulativo.csv')

In [11]:
#hide
regional_cumulative_cases_RM = regional_cumulative_cases.query("Region=='Metropolitana'").iloc[-1].drop("Region")
regional_cases_RM = regional_cumulative_cases_RM.diff().dropna()

In [12]:
#hide
last_known_date = county_cases_RM.index[-1]
last_known_date

'2020-06-05'

In [13]:
#hide
n_missing_days = (pd.to_datetime(regional_cases_RM.index[-1])-pd.to_datetime(data.index[-1])).days

In [14]:
#hide
data = data.T

In [15]:
#hide
for i in np.arange(n_missing_days):
    idx = (pd.to_datetime(last_known_date)+pd.DateOffset(1+i)).strftime("%Y-%m-%d")
    print(idx)
    data[idx] = last_known_proportion

2020-06-06
2020-06-07
2020-06-08


In [16]:
#hide
data = data.T

In [17]:
#hide
data["Regional"] = regional_cases_RM

In [18]:
#hide
data = data.T.apply(lambda x: x[:-1]*x[-1]).T.astype(int)

In [19]:
#hide
list_verde = ['La Reina', 'Las Condes', 'Lo Barnechea', 'Nunoa',  'Providencia', 'Vitacura']
list_rojo = ['Santiago', 'Conchali', 'Huechuraba', 'Independencia', 'Recoleta', 'Quilicura',
            'Cerro Navia', 'Lo Prado', 'Pudahuel', 'Quinta Normal', 'Renca',
            'Estacion Central', 'Maipu', 'Cerrillos', 'Padre Hurtado', 'Penaflor']
list_azul = ['Pedro Aguirre Cerda', 'San Miguel', 'San Joaquin', 'Lo Espejo', 'La Cisterna', \
           'La Granja', 'San Ramon', 'El Bosque', 'San Bernardo', 'La Pintana',
            "Macul", "La Florida", 'Penalolen', "Puente Alto"]

In [20]:
#hide
cases_verde = data[list_verde]
cases_rojo = data[list_rojo]
cases_azul = data[list_azul]

Mapas de [Juanizio Correa](https://twitter.com/Juanizio_C):

> twitter: https://twitter.com/PolarBearby/status/1265826899198017536

In [21]:
#hide
# data = pd.DataFrame()
# data["Verde"] = np.round(cases_verde.sum(axis=1).rolling(window=7).mean())
# data["Rojo"] = np.round(cases_rojo.sum(axis=1).rolling(window=7).mean())
# data["Azul"] = np.round(cases_azul.sum(axis=1).rolling(window=7).mean())

In [22]:
#hide
data = pd.DataFrame()
data["Verde"] = cases_verde.sum(axis=1)
data["Rojo"] = cases_rojo.sum(axis=1)
data["Azul"] = cases_azul.sum(axis=1)

In [23]:
#hide
data_total = data.sum(axis=1)

In [24]:
#hide
data_percentage = pd.DataFrame()
data_percentage["Verde"] = np.round(100*data["Verde"]/data_total, decimals=1)
data_percentage["Rojo"] = np.round(100*data["Rojo"]/data_total, decimals=1)
data_percentage["Azul"] = np.round(100*data["Azul"]/data_total, decimals=1)

In [25]:
#hide
data = pd.DataFrame()
data["Verde"] = np.round(cases_verde.sum(axis=1).rolling(window=7).mean())
data["Rojo"] = np.round(cases_rojo.sum(axis=1).rolling(window=7).mean())
data["Azul"] = np.round(cases_azul.sum(axis=1).rolling(window=7).mean())

In [26]:
#hide
data = data.reset_index().melt("Fecha", var_name="Sector", value_name="Casos confirmados")

In [27]:
#hide
data_percentage = data_percentage.reset_index().melt("Fecha", var_name="Sector", value_name="Porcentaje")

In [28]:
#hide_input

domain = ["Azul", "Rojo", "Verde"]
range_ = ["blue", "red", "green"]

chart1 = alt.Chart(data).mark_area().encode(
    x=alt.X("Fecha", axis=alt.Axis(title="", domain=False)),
    y=alt.Y("Casos confirmados", axis=None, stack="center"),
    tooltip = ["Fecha", "Casos confirmados"],
    color = alt.Color("Sector", scale=alt.Scale(domain=domain, range=range_)),
).properties(
    title="Nuevos casos confirmados en la R.M. por zona socioeconómica",
    width=700)

chart2 = alt.Chart(data_percentage).mark_bar().encode(
    x=alt.X("Fecha", axis=alt.Axis(title="", domain=False, ticks=False, labels=False)),
    y=alt.Y("Porcentaje", axis=alt.Axis(title="", format='%'), stack="normalize"),
    tooltip = ["Fecha", "Porcentaje"],
    color = alt.Color("Sector"),
).properties(
    height = 100,
    width=700
)

alt.vconcat(
    chart1,
    chart2
).configure_concat(
    spacing=0
)

Fuentes: [Ministerio de Ciencia](https://github.com/MinCiencia/Datos-COVID19), [Ministerio de Salud](https://www.minsal.cl/)

In [29]:
#hide
county_cumulative_cases = pd.read_csv(
    "https://raw.githubusercontent.com/MinCiencia/Datos-COVID19/master/output/producto1/Covid-19.csv")

In [30]:
#hide
county_cumulative_cases_RM = county_cumulative_cases.query("Region=='Metropolitana'").drop(
    columns=["Region", "Codigo region", "Poblacion", "Tasa", "Codigo comuna"])

In [31]:
#hide
county_cases_RM = county_cumulative_cases_RM.set_index("Comuna").T.diff().dropna()

In [32]:
#hide
county_cases_RM["Total"] = county_cases_RM.sum(axis=1)
county_cases_RM = county_cases_RM.T.apply(lambda x: x/x[-1]).T

In [33]:
#hide
start_date = county_cases_RM.reset_index()["index"].iloc[0]
end_date = county_cases_RM.reset_index()["index"].iloc[-1]

data = pd.DataFrame()
data['Fecha'] = pd.date_range(start=start_date, end=end_date).strftime('%Y-%m-%d')
data = data.set_index("Fecha")

In [34]:
#hide
data[list(county_cases_RM.columns)] = county_cases_RM[list(county_cases_RM.columns)]

In [35]:
#hide
data = data.interpolate(method="linear")

In [36]:
#hide
last_known_proportion = data.iloc[-1]

In [37]:
#hide
regional_cumulative_cases = pd.read_csv(
    'https://raw.githubusercontent.com/MinCiencia/Datos-COVID19/master/output/producto3/CasosTotalesCumulativo.csv')

In [38]:
#hide
regional_cumulative_cases_RM = regional_cumulative_cases.query("Region=='Metropolitana'").iloc[-1].drop("Region")
regional_cases_RM = regional_cumulative_cases_RM.diff().dropna()

In [39]:
#hide
last_known_date = county_cases_RM.index[-1]
last_known_date

'2020-06-05'

In [40]:
#hide
n_missing_days = (pd.to_datetime(regional_cases_RM.index[-1])-pd.to_datetime(data.index[-1])).days

In [41]:
#hide
data = data.T

In [42]:
#hide
for i in np.arange(n_missing_days):
    idx = (pd.to_datetime(last_known_date)+pd.DateOffset(1+i)).strftime("%Y-%m-%d")
    print(idx)
    data[idx] = last_known_proportion

2020-06-06
2020-06-07
2020-06-08


In [43]:
#hide
data = data.T

In [44]:
#hide
data["Regional"] = regional_cases_RM

In [45]:
#hide
data = data.T.apply(lambda x: x[:-1]*x[-1]).T.astype(int)

In [46]:
#hide
list_nororiente = ['La Reina', 'Las Condes', 'Lo Barnechea', 'Nunoa',  'Providencia', 'Vitacura'] #
list_suroriente = ["Macul", "La Florida", 'Penalolen', "Puente Alto"] #
list_norponiente = ['Cerro Navia', 'Lo Prado', 'Pudahuel', 'Quinta Normal', 'Renca'] #
list_surponiente = ['Estacion Central', 'Maipu', 'Cerrillos', 'Padre Hurtado', 'Penaflor'] #
list_norte = ['Conchali', 'Huechuraba', 'Independencia', 'Recoleta', 'Quilicura'] #
list_centro = ['Santiago'] #
list_sur = ['Pedro Aguirre Cerda', 'San Miguel', 'San Joaquin', 'Lo Espejo', 'La Cisterna', \
           'La Granja', 'San Ramon', 'El Bosque', 'San Bernardo', 'La Pintana'] #

In [47]:
#hide
cases_nororiente = data[list_nororiente]
cases_suroriente = data[list_suroriente]
cases_norponiente = data[list_norponiente]
cases_surponiente = data[list_surponiente]
cases_norte = data[list_norte]
cases_centro = data[list_centro]
cases_sur = data[list_sur]

In [48]:
#hide
data = pd.DataFrame()
data["7. Nororiente"] = cases_nororiente.sum(axis=1).rolling(window=7).mean()
data["6. Suroriente"] = cases_suroriente.sum(axis=1).rolling(window=7).mean()
data["5. Norte"] = cases_norte.sum(axis=1).rolling(window=7).mean()
data["4. Centro"] = cases_centro.sum(axis=1).rolling(window=7).mean()
data["3. Sur"] = cases_sur.sum(axis=1).rolling(window=7).mean()
data["2. Norponiente"] = cases_norponiente.sum(axis=1).rolling(window=7).mean()
data["1. Surponiente"] = cases_surponiente.sum(axis=1).rolling(window=7).mean()

In [49]:
#hide
data = data.reset_index()

In [50]:
#hide
data = data.melt("Fecha", var_name="Sector", value_name="Casos confirmados")

In [51]:
#hide_input
chart1 = alt.Chart(data).mark_area().encode(
    x=alt.X("Fecha", axis=alt.Axis(title="", domain=False)),
    y=alt.Y("Casos confirmados", axis=None, stack="center"),
    tooltip = ["Fecha", "Casos confirmados"],
    color = alt.Color("Sector"),
).properties(
    title="Nuevos casos confirmados en la R.M. por zona geográfica (promedio móvil 7 días)",
    width=700)

chart2 = alt.Chart(data).mark_bar().encode(
    x=alt.X("Fecha", axis=alt.Axis(title="", domain=False, ticks=False, labels=False)),
    y=alt.Y("Casos confirmados", axis=alt.Axis(title="", format='%'), stack="normalize"),
    tooltip = ["Fecha", "Casos confirmados"],
    color = alt.Color("Sector"),
).properties(
    height = 100,
    width=700
)

alt.vconcat(
    chart1,
    chart2
).configure_concat(
    spacing=0
)

Fuentes: [Ministerio de Ciencia](https://github.com/MinCiencia/Datos-COVID19), [Ministerio de Salud](https://www.minsal.cl/), Para la agrupación de comunas en 7 sectores, la página Wikipedia [Comunas de Santiago de Chile](https://es.wikipedia.org/wiki/Anexo:Comunas_de_Santiago_de_Chile#Conurbaci%C3%B3n_de_Santiago)

* No hay datos diarios de nuevos confirmados por comuna. Estos son estimados a través de una interpolación lineal entre las proporciones observadas entre los dos informes más cercanos en el tiempo y aplicados a casos confirmados regionales.