In [1]:
import pandas as pd
import numpy as np

import csv

import altair as alt
pd.options.display.max_rows = 999

In [2]:
casos = pd.read_csv('../data/processed/casos_covid.csv')
casos['fecha'] = pd.to_datetime(casos['fecha'], format='%Y-%m-%d')
casos['anio_mes'] = casos['fecha'].dt.year.astype(str) + '-' + casos['fecha'].dt.month.astype(str).str.zfill(2)
display(casos.head(3))

Unnamed: 0,pais,estado,fecha,confirmados,negativos,casos,defunciones,ola,anio_mes
0,MEXICO,AGUASCALIENTES,2019-12-31,0.0,0,0.0,0.0,ola_1,2019-12
1,MEXICO,AGUASCALIENTES,2020-01-01,0.0,0,0.0,0.0,ola_1,2020-01
2,MEXICO,AGUASCALIENTES,2020-01-02,0.0,2,0.0,0.0,ola_1,2020-01


In [3]:
casos_tot = casos.groupby(by=['pais', 'ola', 'fecha'], as_index=False).agg({'confirmados': 'sum',
                                                                     'negativos': 'sum',
                                                                     'casos': 'sum',
                                                                     'defunciones': 'sum'})
display(casos_tot.head(3))

Unnamed: 0,pais,ola,fecha,confirmados,negativos,casos,defunciones
0,MEXICO,ola_1,2019-12-31,0.0,0,0.0,0.0
1,MEXICO,ola_1,2020-01-01,0.0,25,0.0,0.0
2,MEXICO,ola_1,2020-01-02,0.0,72,0.0,0.0


In [4]:
casos_ent = casos.groupby(by=['estado'], as_index=False).agg({'confirmados': 'sum',
                                                              'negativos': 'sum',
                                                              'casos': 'sum',
                                                              'defunciones': 'sum'})
display(casos_ent.head(3))

Unnamed: 0,estado,confirmados,negativos,casos,defunciones
0,AGUASCALIENTES,52203.0,103090,55464.0,3261.0
1,BAJA CALIFORNIA,123336.0,157765,134976.0,11640.0
2,BAJA CALIFORNIA SUR,94531.0,150378,97065.0,2534.0


In [5]:
casos_mes = casos.groupby(by=['estado', 'anio_mes'], as_index=False).agg({'confirmados': 'sum',
                                                              'negativos': 'sum',
                                                              'casos': 'sum',
                                                              'defunciones': 'sum'})
display(casos_mes.head(3))

Unnamed: 0,estado,anio_mes,confirmados,negativos,casos,defunciones
0,AGUASCALIENTES,2019-12,0.0,0,0.0,0.0
1,AGUASCALIENTES,2020-01,0.0,171,0.0,0.0
2,AGUASCALIENTES,2020-02,0.0,173,0.0,0.0


In [6]:
alt.Chart(casos_ent).mark_bar().encode(
    x='estado',
    y='defunciones'
)

In [7]:
alt.Chart(casos_tot).mark_bar().encode(
    alt.X("confirmados", bin=True),
    y='count()',
)

In [8]:
alt.Chart(casos_tot).mark_line().encode(
    x='fecha',
    y='confirmados'
)

In [9]:
subset = casos_tot.loc[casos_tot['confirmados'] > 7000]

alt.Chart(subset).mark_circle(size=60).encode(
    x='confirmados',
    y='defunciones',
    color='ola',
    #tooltip=['Name', 'Ola', 'Confirmados', 'Defunciones']
).interactive()

In [10]:
alt.Chart(subset).mark_tick().encode(
    x='defunciones',
    y='ola'
)

In [11]:
subset = pd.melt(casos_tot, id_vars='fecha', value_vars=['confirmados', 'negativos'], 
                 var_name='test', value_name='conteo')
display(subset.head(3))

alt.Chart(subset).mark_area().encode(
    x="fecha",
    y="conteo",
    color="test"
)

Unnamed: 0,fecha,test,conteo
0,2019-12-31,confirmados,0.0
1,2020-01-01,confirmados,0.0
2,2020-01-02,confirmados,0.0


In [12]:
subset = casos_tot.groupby(by='ola', as_index=False).agg({'confirmados': 'sum', 'defunciones': 'sum'})
subset['letalidad'] = subset['defunciones'] / subset['confirmados']

alt.Chart(subset).mark_bar().encode(
    alt.X('letalidad:Q', axis=alt.Axis(format='.0%')),
    y='ola:N'
)

In [13]:
subset = casos_tot.groupby(by='ola', as_index=False).agg({'confirmados': 'sum', 'negativos': 'sum', 'defunciones': 'sum'})

subset = pd.melt(subset, id_vars='ola', value_vars=['confirmados', 'defunciones', 'negativos'], 
                 var_name='grupo', value_name='conteo')

alt.Chart(subset).mark_bar().encode(
    x='grupo:O',
    y='conteo:Q',
    color='grupo:N',
    column='ola:N'
)

In [14]:
alt.Chart(subset).mark_bar().encode(
    x='conteo',
    y='ola',
    color='grupo'
)

In [15]:
alt.Chart(subset).mark_bar(opacity=0.4).encode(
    x='ola:O',
    y=alt.Y('conteo:Q', stack=None),
    color="grupo",
)

In [16]:
subset = casos_mes.loc[casos_mes['estado'].isin(['DISTRITO FEDERAL', 'JALISCO', 'MEXICO', 'NUEVO LEON'])].copy()

subset["rango"] = subset.groupby(by=['anio_mes'])['defunciones'].rank("dense", ascending=False)
display(subset.head(3))

alt.Chart(subset).mark_line(point = True).encode(
    x = alt.X("anio_mes:O", timeUnit="yearmonth", title="date"),
    y="rango:O",
    color=alt.Color("estado:N")
).properties(
    title="Bump Chart for Stock Prices",
    width=700,
    height=200,
)

Unnamed: 0,estado,anio_mes,confirmados,negativos,casos,defunciones,rango
208,DISTRITO FEDERAL,2019-12,0.0,0,0.0,0.0,1.0
209,DISTRITO FEDERAL,2020-01,0.0,685,0.0,0.0,1.0
210,DISTRITO FEDERAL,2020-02,2.0,525,2.0,0.0,1.0


In [17]:
subset = casos_mes.loc[casos_mes['estado'].isin(['DISTRITO FEDERAL'])].copy()

alt.Chart(subset).mark_trail().encode(
    x='anio_mes:T',
    y='confirmados:Q',
    size='confirmados:Q'
)

In [18]:
alt.Chart(subset).mark_area(
    line={'color':'darkgreen'},
    color=alt.Gradient(
        gradient='linear',
        stops=[alt.GradientStop(color='white', offset=0),
               alt.GradientStop(color='darkgreen', offset=1)],
        x1=1,
        x2=1,
        y1=1,
        y2=0
    )
).encode(
    alt.X('anio_mes:T'),
    alt.Y('defunciones:Q')
)

In [19]:
subset = casos_mes.loc[casos_mes['estado'].isin(['DISTRITO FEDERAL'])].copy()
subset = pd.melt(subset, id_vars='anio_mes', value_vars=['confirmados', 'negativos', 'defunciones'], 
                 var_name='concepto', value_name='conteo')

alt.Chart(subset).mark_area(opacity=0.3).encode(
    x="anio_mes:T",
    y=alt.Y("conteo:Q", stack=None),
    color="concepto:N"
)

In [20]:
alt.Chart(subset).mark_area().encode(
    x="anio_mes:T",
    y=alt.Y("conteo:Q", stack='normalize'),
    color="concepto:N"
)

In [21]:
alt.Chart(subset).mark_area().encode(
    x="anio_mes:T",
    y="conteo:Q",
    color="concepto:N",
    row="concepto:N"
).properties(
    height=100
)

In [22]:
subset = casos.loc[casos['estado'].isin(['DISTRITO FEDERAL'])].copy()

alt.Chart(subset).transform_fold(
    ['confirmados',
     'negativos',
     'defunciones'],
    as_ = ['Measurement_type', 'value']
).transform_density(
    density='value',
    bandwidth=3,
    groupby=['Measurement_type'],
    extent= [0, 100]
).mark_area().encode(
    alt.X('value:Q'),
    alt.Y('density:Q'),
    alt.Row('Measurement_type:N')
).properties(width=700, height=100)

In [23]:
alt.Chart(subset).transform_fold(
    ['confirmados',
     'negativos',
     'defunciones'],
    as_ = ['Measurement_type', 'value']
).transform_density(
    density='value',
    bandwidth=3,
    groupby=['Measurement_type'],
    extent= [0, 100]
).mark_area().encode(
    alt.X('value:Q'),
    alt.Y('density:Q', stack='zero'),
    alt.Color('Measurement_type:N')
).properties(width=700, height=200)
