In [1]:
import pandas as pd
import numpy as np
import csv

import altair as alt
from vega_datasets import data
pd.options.display.max_rows = 999

In [2]:
casos = pd.read_csv('../data/processed/casos_covid.csv')
casos['fecha'] = pd.to_datetime(casos['fecha'], format='%Y-%m-%d')
casos['anio_mes'] = casos['fecha'].dt.year.astype(str) + '-' + casos['fecha'].dt.month.astype(str).str.zfill(2)
display(casos.head(3))

Unnamed: 0,pais,estado,fecha,confirmados,negativos,casos,defunciones,ola,anio_mes
0,MEXICO,AGUASCALIENTES,2019-12-31,0.0,0,0.0,0.0,ola_1,2019-12
1,MEXICO,AGUASCALIENTES,2020-01-01,0.0,0,0.0,0.0,ola_1,2020-01
2,MEXICO,AGUASCALIENTES,2020-01-02,0.0,2,0.0,0.0,ola_1,2020-01


In [3]:
casos_tot = casos.groupby(by=['pais', 'ola', 'fecha'], as_index=False).agg({'confirmados': 'sum',
                                                                     'negativos': 'sum',
                                                                     'casos': 'sum',
                                                                     'defunciones': 'sum'})
display(casos_tot.head(3))

Unnamed: 0,pais,ola,fecha,confirmados,negativos,casos,defunciones
0,MEXICO,ola_1,2019-12-31,0.0,0,0.0,0.0
1,MEXICO,ola_1,2020-01-01,0.0,25,0.0,0.0
2,MEXICO,ola_1,2020-01-02,0.0,72,0.0,0.0


In [4]:
casos_ent = casos.groupby(by=['estado'], as_index=False).agg({'confirmados': 'sum',
                                                              'negativos': 'sum',
                                                              'casos': 'sum',
                                                              'defunciones': 'sum'})
display(casos_ent.head(3))

Unnamed: 0,estado,confirmados,negativos,casos,defunciones
0,AGUASCALIENTES,52203.0,103090,55464.0,3261.0
1,BAJA CALIFORNIA,123336.0,157765,134976.0,11640.0
2,BAJA CALIFORNIA SUR,94531.0,150378,97065.0,2534.0


In [5]:
casos_mes = casos.groupby(by=['estado', 'anio_mes'], as_index=False).agg({'confirmados': 'sum',
                                                              'negativos': 'sum',
                                                              'casos': 'sum',
                                                              'defunciones': 'sum'})
display(casos_mes.head(3))

Unnamed: 0,estado,anio_mes,confirmados,negativos,casos,defunciones
0,AGUASCALIENTES,2019-12,0.0,0,0.0,0.0
1,AGUASCALIENTES,2020-01,0.0,171,0.0,0.0
2,AGUASCALIENTES,2020-02,0.0,173,0.0,0.0


In [6]:

source = pd.DataFrame({"category": [1, 2, 3, 4, 5, 6], "value": [4, 6, 10, 3, 7, 8]})

alt.Chart(source).mark_arc(innerRadius=50).encode(
    theta=alt.Theta(field="value", type="quantitative"),
    color=alt.Color(field="category", type="nominal"),
)

In [7]:
source = pd.DataFrame({"category": [1, 2, 3, 4, 5, 6], "value": [4, 6, 10, 3, 7, 8]})

alt.Chart(source).mark_arc().encode(
    theta=alt.Theta(field="value", type="quantitative"),
    color=alt.Color(field="category", type="nominal"),
)

In [8]:
subset = casos_tot.copy()

alt.Chart(subset).mark_circle().encode(
    alt.X('confirmados:Q', bin=True),
    alt.Y('negativos:Q', bin=True),
    size='count()'
)


In [9]:
subset = casos.groupby(by=['ola', 'anio_mes'], as_index=False).agg({'confirmados': 'sum',
                                                                     'defunciones': 'sum'})

# Brush for selection
brush = alt.selection(type='interval')

# Scatter Plot
points = alt.Chart(subset).mark_point().encode(
    x='confirmados:Q',
    y='defunciones:Q',
    color=alt.condition(brush, 'ola:O', alt.value('grey'))
).add_selection(brush)

points

In [10]:
subset = casos.groupby(by=['ola', 'anio_mes'], as_index=False).agg({'confirmados': 'sum',
                                                                    'negativos': 'sum',
                                                                    'defunciones': 'sum'})

alt.Chart(subset).mark_point().encode(
    x='confirmados',
    y='negativos',
    size='defunciones'
)

In [11]:
alt.Chart(subset).mark_circle().encode(
    alt.X('confirmados', scale=alt.Scale(zero=False)),
    alt.Y('negativos', scale=alt.Scale(zero=False, padding=1)),
    color='ola',
    size='defunciones'
)

In [12]:
alt.Chart(subset).mark_circle().encode(
    alt.X(alt.repeat("column"), type='quantitative'),
    alt.Y(alt.repeat("row"), type='quantitative'),
    color='ola:N'
).properties(
    width=180,
    height=180
).repeat(
    row=['confirmados', 'negativos', 'defunciones'],
    column=['defunciones', 'negativos', 'confirmados']
).interactive()

In [13]:
source = data.github.url

alt.Chart(source).mark_circle().encode(
    x='hours(time):O',
    y='day(time):O',
    size='sum(count):Q'
)

In [14]:
alt.Chart(subset).mark_point().encode(
    x='confirmados:Q',
    y='defunciones:Q',
    row='ola:N'
)

In [15]:
subset = casos_mes.copy()
subset = pd.melt(subset, id_vars='anio_mes', value_vars=['confirmados', 'negativos', 'defunciones'],
                 var_name='concepto', value_name='conteo')

display(subset.head(3))

alt.Chart(subset).mark_bar(
    opacity=0.3,
    binSpacing=0
).encode(
    alt.X('conteo:Q', bin=alt.Bin(maxbins=100)),
    alt.Y('count()', stack=None),
    alt.Color('concepto:N')
)

Unnamed: 0,anio_mes,concepto,conteo
0,2019-12,confirmados,0.0
1,2020-01,confirmados,0.0
2,2020-02,confirmados,0.0


In [16]:
counties = alt.topo_feature(data.us_10m.url, 'counties')
source = data.unemployment.url

alt.Chart(counties).mark_geoshape().encode(
    color='rate:Q'
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(source, 'id', ['rate'])
).project(
    type='albersUsa'
).properties(
    width=500,
    height=300
)

In [17]:
source = data.movies.url

alt.Chart(source).mark_rect().encode(
    alt.X('IMDB_Rating:Q', bin=alt.Bin(maxbins=60)),
    alt.Y('Rotten_Tomatoes_Rating:Q', bin=alt.Bin(maxbins=40)),
    alt.Color('count():Q', scale=alt.Scale(scheme='greenblue'))
)