# Deadly Visualizations!!!

![Image](../images/viz_types_portada.png)

## Setup

First we need to create a basic setup which includes:

- Importing the libraries.

- Reading the dataset file (source [Instituto Nacional de Estadística](https://www.ine.es/ss/Satellite?L=es_ES&c=Page&cid=1259942408928&p=1259942408928&pagename=ProductosYServicios%2FPYSLayout)).

- Create a couple of columns and tables for the analysis.

__NOTE:__ some functions were already created in order to help you go through the challenge. However, feel free to perform any code you might need.

In [1]:
# imports

import sys
import re
sys.path.insert(0, "../modules")

import numpy as np
import pandas as pd

import plotly.express as px
import cufflinks as cf  #Hacer gráficos cufflinks
cf.go_offline()

import module as mod     # functions are include in module.py

In [2]:
#pip install cufflinks

In [3]:
# read dataset

deaths = pd.read_csv('../data/7947.csv', sep=';', thousands='.')

deaths.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301158 entries, 0 to 301157
Data columns (total 5 columns):
 #   Column           Non-Null Count   Dtype 
---  ------           --------------   ----- 
 0   Causa de muerte  301158 non-null  object
 1   Sexo             301158 non-null  object
 2   Edad             301158 non-null  object
 3   Periodo          301158 non-null  int64 
 4   Total            301158 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 11.5+ MB


In [4]:
# add some columns...you'll need them later

deaths['cause_code'] = deaths['Causa de muerte'].apply(mod.cause_code)
deaths['cause_group'] = deaths['Causa de muerte'].apply(mod.cause_types)
deaths['cause_name'] = deaths['Causa de muerte'].apply(mod.cause_name)

deaths.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301158 entries, 0 to 301157
Data columns (total 8 columns):
 #   Column           Non-Null Count   Dtype 
---  ------           --------------   ----- 
 0   Causa de muerte  301158 non-null  object
 1   Sexo             301158 non-null  object
 2   Edad             301158 non-null  object
 3   Periodo          301158 non-null  int64 
 4   Total            301158 non-null  int64 
 5   cause_code       301158 non-null  object
 6   cause_group      301158 non-null  object
 7   cause_name       301158 non-null  object
dtypes: int64(2), object(6)
memory usage: 18.4+ MB


In [5]:
# lets check the categorical variables

var_list = ['Sexo', 'Edad', 'Periodo', 'cause_code', 'cause_name', 'cause_group']

categories = mod.cat_var(deaths, var_list)
categories

Unnamed: 0,categorical_variable,number_of_possible_values,values
0,cause_code,117,"[001-102, 001-008, 001, 002, 003, 004, 005, 00..."
1,cause_name,117,"[I-XXII.Todas las causas, I.Enfermedades infec..."
2,Periodo,39,"[2018, 2017, 2016, 2015, 2014, 2013, 2012, 201..."
3,Edad,22,"[Todas las edades, Menos de 1 año, De 1 a 4 añ..."
4,Sexo,3,"[Total, Hombres, Mujeres]"
5,cause_group,2,"[Multiple causes, Single cause]"


In [6]:
# we need also to create a causes table for the analysis

causes_table = deaths[['cause_code', 'cause_name']].drop_duplicates().sort_values(by='cause_code').reset_index(drop=True)

causes_table

Unnamed: 0,cause_code,cause_name
0,001,Enfermedades infecciosas intestinales
1,001-008,I.Enfermedades infecciosas y parasitarias
2,001-102,I-XXII.Todas las causas
3,002,Tuberculosis y sus efectos tardíos
4,003,Enfermedad meningocócica
...,...,...
112,098,Suicidio y lesiones autoinfligidas
113,099,Agresiones (homicidio)
114,100,Eventos de intención no determinada
115,101,Complicaciones de la atención médica y quirúrgica


In [7]:
# And some space for free-style Pandas!!! (e.g.: df['column_name'].unique())

dataset = mod.row_filter(deaths, 'Causa de muerte', ["006  SIDA"])


dataset= mod.row_filter(dataset, 'Sexo', ["Total"])


dataset= mod.row_filter(dataset, 'Edad', ["Todas las edades"])

dataset

Unnamed: 0,Causa de muerte,Sexo,Edad,Periodo,Total,cause_code,cause_group,cause_name
0,006 SIDA,Total,Todas las edades,1995,5551,6,Single cause,SIDA
1,006 SIDA,Total,Todas las edades,1996,5432,6,Single cause,SIDA
2,006 SIDA,Total,Todas las edades,1994,4770,6,Single cause,SIDA
3,006 SIDA,Total,Todas las edades,1993,3980,6,Single cause,SIDA
4,006 SIDA,Total,Todas las edades,1992,3256,6,Single cause,SIDA
5,006 SIDA,Total,Todas las edades,1997,2844,6,Single cause,SIDA
6,006 SIDA,Total,Todas las edades,1991,2503,6,Single cause,SIDA
7,006 SIDA,Total,Todas las edades,1990,1898,6,Single cause,SIDA
8,006 SIDA,Total,Todas las edades,1999,1802,6,Single cause,SIDA
9,006 SIDA,Total,Todas las edades,1998,1779,6,Single cause,SIDA


In [8]:
dataset["media"] = dataset["Total"].mean()
dataset

Unnamed: 0,Causa de muerte,Sexo,Edad,Periodo,Total,cause_code,cause_group,cause_name,media
0,006 SIDA,Total,Todas las edades,1995,5551,6,Single cause,SIDA,1468.871795
1,006 SIDA,Total,Todas las edades,1996,5432,6,Single cause,SIDA,1468.871795
2,006 SIDA,Total,Todas las edades,1994,4770,6,Single cause,SIDA,1468.871795
3,006 SIDA,Total,Todas las edades,1993,3980,6,Single cause,SIDA,1468.871795
4,006 SIDA,Total,Todas las edades,1992,3256,6,Single cause,SIDA,1468.871795
5,006 SIDA,Total,Todas las edades,1997,2844,6,Single cause,SIDA,1468.871795
6,006 SIDA,Total,Todas las edades,1991,2503,6,Single cause,SIDA,1468.871795
7,006 SIDA,Total,Todas las edades,1990,1898,6,Single cause,SIDA,1468.871795
8,006 SIDA,Total,Todas las edades,1999,1802,6,Single cause,SIDA,1468.871795
9,006 SIDA,Total,Todas las edades,1998,1779,6,Single cause,SIDA,1468.871795


In [9]:
dataset["percentil_05"] = dataset["Total"].quantile(0.5)/100
dataset["percentil_025"] = dataset["Total"].quantile(0.25)/100
dataset["percentil_075"] = dataset["Total"].quantile(0.75)/100

dataset

Unnamed: 0,Causa de muerte,Sexo,Edad,Periodo,Total,cause_code,cause_group,cause_name,media,percentil_05,percentil_025,percentil_075
0,006 SIDA,Total,Todas las edades,1995,5551,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
1,006 SIDA,Total,Todas las edades,1996,5432,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
2,006 SIDA,Total,Todas las edades,1994,4770,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
3,006 SIDA,Total,Todas las edades,1993,3980,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
4,006 SIDA,Total,Todas las edades,1992,3256,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
5,006 SIDA,Total,Todas las edades,1997,2844,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
6,006 SIDA,Total,Todas las edades,1991,2503,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
7,006 SIDA,Total,Todas las edades,1990,1898,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
8,006 SIDA,Total,Todas las edades,1999,1802,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
9,006 SIDA,Total,Todas las edades,1998,1779,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45


#### dataset_ordered_bien = dataset.groupby('Periodo').sum().sort_values(by="Periodo", ascending=False)
print(dataset_ordered_bien)


In [10]:
dataset_ordered = dataset.sort_values(by="Periodo", ascending=False) #sort_values = ordena 
dataset_ordered

Unnamed: 0,Causa de muerte,Sexo,Edad,Periodo,Total,cause_code,cause_group,cause_name,media,percentil_05,percentil_025,percentil_075
30,006 SIDA,Total,Todas las edades,2018,423,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
29,006 SIDA,Total,Todas las edades,2017,442,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
28,006 SIDA,Total,Todas las edades,2016,498,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
27,006 SIDA,Total,Todas las edades,2015,633,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
26,006 SIDA,Total,Todas las edades,2014,700,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
25,006 SIDA,Total,Todas las edades,2013,749,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
23,006 SIDA,Total,Todas las edades,2012,880,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
22,006 SIDA,Total,Todas las edades,2011,953,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
21,006 SIDA,Total,Todas las edades,2010,1020,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
20,006 SIDA,Total,Todas las edades,2009,1079,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45


In [11]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39 entries, 0 to 38
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Causa de muerte  39 non-null     object 
 1   Sexo             39 non-null     object 
 2   Edad             39 non-null     object 
 3   Periodo          39 non-null     int64  
 4   Total            39 non-null     int64  
 5   cause_code       39 non-null     object 
 6   cause_group      39 non-null     object 
 7   cause_name       39 non-null     object 
 8   media            39 non-null     float64
 9   percentil_05     39 non-null     float64
 10  percentil_025    39 non-null     float64
 11  percentil_075    39 non-null     float64
dtypes: float64(4), int64(2), object(6)
memory usage: 3.8+ KB


In [12]:
dataset_1= dataset_ordered.loc[:38,"Periodo"]

dataset_1

30    2018
29    2017
28    2016
27    2015
26    2014
25    2013
23    2012
22    2011
21    2010
20    2009
19    2008
17    2007
16    2006
15    2005
14    2004
11    2003
13    2002
12    2001
10    2000
8     1999
9     1998
5     1997
1     1996
0     1995
2     1994
3     1993
4     1992
6     1991
7     1990
18    1989
24    1988
31    1987
32    1986
33    1985
36    1984
38    1983
Name: Periodo, dtype: int64

In [13]:
dataset.head()

Unnamed: 0,Causa de muerte,Sexo,Edad,Periodo,Total,cause_code,cause_group,cause_name,media,percentil_05,percentil_025,percentil_075
0,006 SIDA,Total,Todas las edades,1995,5551,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
1,006 SIDA,Total,Todas las edades,1996,5432,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
2,006 SIDA,Total,Todas las edades,1994,4770,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
3,006 SIDA,Total,Todas las edades,1993,3980,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
4,006 SIDA,Total,Todas las edades,1992,3256,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45


In [14]:
dataset.columns

Index(['Causa de muerte', 'Sexo', 'Edad', 'Periodo', 'Total', 'cause_code',
       'cause_group', 'cause_name', 'media', 'percentil_05', 'percentil_025',
       'percentil_075'],
      dtype='object')

In [15]:
dataset_1997= dataset_ordered[dataset_ordered['Periodo']>=1997]
dataset_1997


Unnamed: 0,Causa de muerte,Sexo,Edad,Periodo,Total,cause_code,cause_group,cause_name,media,percentil_05,percentil_025,percentil_075
30,006 SIDA,Total,Todas las edades,2018,423,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
29,006 SIDA,Total,Todas las edades,2017,442,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
28,006 SIDA,Total,Todas las edades,2016,498,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
27,006 SIDA,Total,Todas las edades,2015,633,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
26,006 SIDA,Total,Todas las edades,2014,700,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
25,006 SIDA,Total,Todas las edades,2013,749,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
23,006 SIDA,Total,Todas las edades,2012,880,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
22,006 SIDA,Total,Todas las edades,2011,953,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
21,006 SIDA,Total,Todas las edades,2010,1020,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
20,006 SIDA,Total,Todas las edades,2009,1079,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45


In [16]:
dataset_1996= dataset_ordered[dataset_ordered['Periodo']<=1996]
dataset_1996


Unnamed: 0,Causa de muerte,Sexo,Edad,Periodo,Total,cause_code,cause_group,cause_name,media,percentil_05,percentil_025,percentil_075
1,006 SIDA,Total,Todas las edades,1996,5432,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
0,006 SIDA,Total,Todas las edades,1995,5551,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
2,006 SIDA,Total,Todas las edades,1994,4770,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
3,006 SIDA,Total,Todas las edades,1993,3980,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
4,006 SIDA,Total,Todas las edades,1992,3256,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
6,006 SIDA,Total,Todas las edades,1991,2503,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
7,006 SIDA,Total,Todas las edades,1990,1898,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
18,006 SIDA,Total,Todas las edades,1989,1288,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
24,006 SIDA,Total,Todas las edades,1988,768,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
31,006 SIDA,Total,Todas las edades,1987,409,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45


In [17]:
fig = px.bar(dataset, x='Periodo', y='Total')
fig.show()

In [18]:
fig = px.bar(dataset_1997, x='Periodo', y='Total')
fig.show()

In [19]:
fig = px.bar(dataset_1996, x='Periodo', y='Total')
fig.show()

In [20]:
fig = px.line(dataset_ordered, x="Periodo", y="Total", title='Evolución muertes últimos 40 años por SIDA')
fig.show()

In [21]:
fig = px.line(dataset_1996, x="Periodo", y="Total", title='Evolución muertes últimos 40 años por SIDA')
fig.show()

In [22]:
fig = px.line(dataset_1997, x="Periodo", y="Total", title='Evolución muertes últimos 40 años por SIDA')
fig.show()

In [23]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39 entries, 0 to 38
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Causa de muerte  39 non-null     object 
 1   Sexo             39 non-null     object 
 2   Edad             39 non-null     object 
 3   Periodo          39 non-null     int64  
 4   Total            39 non-null     int64  
 5   cause_code       39 non-null     object 
 6   cause_group      39 non-null     object 
 7   cause_name       39 non-null     object 
 8   media            39 non-null     float64
 9   percentil_05     39 non-null     float64
 10  percentil_025    39 non-null     float64
 11  percentil_075    39 non-null     float64
dtypes: float64(4), int64(2), object(6)
memory usage: 3.8+ KB


In [24]:
dataset.describe()

Unnamed: 0,Periodo,Total,media,percentil_05,percentil_025,percentil_075
count,39.0,39.0,39.0,39.0,39.0,39.0
mean,1999.0,1468.871795,1468.872,12.11,4.7,17.45
std,11.401754,1436.369568,1.382076e-12,1.079747e-14,1.799578e-15,7.198313e-15
min,1980.0,0.0,1468.872,12.11,4.7,17.45
25%,1989.5,470.0,1468.872,12.11,4.7,17.45
50%,1999.0,1211.0,1468.872,12.11,4.7,17.45
75%,2008.5,1745.0,1468.872,12.11,4.7,17.45
max,2018.0,5551.0,1468.872,12.11,4.7,17.45


In [25]:
dataset_m = mod.row_filter(deaths, 'Causa de muerte', ["006  SIDA"])


dataset_m= mod.row_filter(dataset, 'Sexo', ["Mujeres"])



dataset_m= mod.row_filter(dataset, 'Edad', ["Todas las edades"])

dataset_m

Unnamed: 0,Causa de muerte,Sexo,Edad,Periodo,Total,cause_code,cause_group,cause_name,media,percentil_05,percentil_025,percentil_075
0,006 SIDA,Total,Todas las edades,1995,5551,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
1,006 SIDA,Total,Todas las edades,1996,5432,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
2,006 SIDA,Total,Todas las edades,1994,4770,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
3,006 SIDA,Total,Todas las edades,1993,3980,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
4,006 SIDA,Total,Todas las edades,1992,3256,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
5,006 SIDA,Total,Todas las edades,1997,2844,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
6,006 SIDA,Total,Todas las edades,1991,2503,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
7,006 SIDA,Total,Todas las edades,1990,1898,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
8,006 SIDA,Total,Todas las edades,1999,1802,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45
9,006 SIDA,Total,Todas las edades,1998,1779,6,Single cause,SIDA,1468.871795,12.11,4.7,17.45


In [26]:
dataset_h= mod.row_filter(deaths, 'Causa de muerte', ["006  SIDA"])



dataset_h= mod.row_filter(dataset_h, 'Sexo', ["Hombres"])



dataset_h= mod.row_filter(dataset_h, 'Edad', ["Todas las edades"])

dataset_h

Unnamed: 0,Causa de muerte,Sexo,Edad,Periodo,Total,cause_code,cause_group,cause_name
0,006 SIDA,Hombres,Todas las edades,1995,4509,6,Single cause,SIDA
1,006 SIDA,Hombres,Todas las edades,1996,4355,6,Single cause,SIDA
2,006 SIDA,Hombres,Todas las edades,1994,3850,6,Single cause,SIDA
3,006 SIDA,Hombres,Todas las edades,1993,3219,6,Single cause,SIDA
4,006 SIDA,Hombres,Todas las edades,1992,2706,6,Single cause,SIDA
5,006 SIDA,Hombres,Todas las edades,1997,2336,6,Single cause,SIDA
6,006 SIDA,Hombres,Todas las edades,1991,2115,6,Single cause,SIDA
7,006 SIDA,Hombres,Todas las edades,1990,1576,6,Single cause,SIDA
8,006 SIDA,Hombres,Todas las edades,1999,1488,6,Single cause,SIDA
9,006 SIDA,Hombres,Todas las edades,1998,1476,6,Single cause,SIDA


In [27]:


df_sexo = pd.merge(dataset_h, dataset_m[["Sexo", "Total"]], left_index=True, right_index=True)

total_hombres = df_sexo['Total_x'].sum()
total_mujeres = df_sexo['Total_y'].sum()  

df_sexo

Unnamed: 0,Causa de muerte,Sexo_x,Edad,Periodo,Total_x,cause_code,cause_group,cause_name,Sexo_y,Total_y
0,006 SIDA,Hombres,Todas las edades,1995,4509,6,Single cause,SIDA,Total,5551
1,006 SIDA,Hombres,Todas las edades,1996,4355,6,Single cause,SIDA,Total,5432
2,006 SIDA,Hombres,Todas las edades,1994,3850,6,Single cause,SIDA,Total,4770
3,006 SIDA,Hombres,Todas las edades,1993,3219,6,Single cause,SIDA,Total,3980
4,006 SIDA,Hombres,Todas las edades,1992,2706,6,Single cause,SIDA,Total,3256
5,006 SIDA,Hombres,Todas las edades,1997,2336,6,Single cause,SIDA,Total,2844
6,006 SIDA,Hombres,Todas las edades,1991,2115,6,Single cause,SIDA,Total,2503
7,006 SIDA,Hombres,Todas las edades,1990,1576,6,Single cause,SIDA,Total,1898
8,006 SIDA,Hombres,Todas las edades,1999,1488,6,Single cause,SIDA,Total,1802
9,006 SIDA,Hombres,Todas las edades,1998,1476,6,Single cause,SIDA,Total,1779


In [28]:
 


df_plot = pd.concat([
    df_sexo[['Sexo_x', 'Total_x']].rename(columns={'Sexo_x': 'Sexo', 'Total_x': 'Total'}),
    df_sexo[['Sexo_y', 'Total_y']].rename(columns={'Sexo_y': 'Sexo', 'Total_y': 'Total'})
])

fig = px.bar(df_plot, x='Sexo', y='Total', color='Sexo', 
             labels={'Sexo': 'Género', 'Total': 'Total'},
             title='Total por Género')
fig.show()

In [29]:
df_plot

Unnamed: 0,Sexo,Total
0,Hombres,4509
1,Hombres,4355
2,Hombres,3850
3,Hombres,3219
4,Hombres,2706
...,...,...
34,Total,0
35,Total,0
36,Total,0
37,Total,0


In [30]:
df_plot.info()

<class 'pandas.core.frame.DataFrame'>
Index: 78 entries, 0 to 38
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Sexo    78 non-null     object
 1   Total   78 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 1.8+ KB


## Lets make some transformations

Eventhough the dataset is pretty clean, the information is completely denormalized as you could see. For that matter a collection of methods (functions) are available in order to generate the tables you might need:

- `row_filter(df, cat_var, cat_values)` => Filter rows by any value or group of values in a categorical variable.

- `nrow_filter(df, cat_var, cat_values)` => The same but backwards. 

- `groupby_sum(df, group_vars, agg_var='Total', sort_var='Total')` => Add deaths by a certain variable.

- `pivot_table(df, col, x_axis, value='Total')`=> Make some pivot tables, you might need them...

__NOTE:__ be aware that the filtering methods can perform a filter at a time. Feel free to perform the filter you need in any way you want or feel confortable with.

In [31]:
# Example 1
'''
dataset = mod.row_filter(deaths, 'Sexo', ['Total'])
dataset = mod.row_filter(dataset, 'Edad', ['Todas las edades'])
dataset.head()
'''

"\ndataset = mod.row_filter(deaths, 'Sexo', ['Total'])\ndataset = mod.row_filter(dataset, 'Edad', ['Todas las edades'])\ndataset.head()\n"

In [32]:
# Example 2
'''
group = ['cause_code','Periodo']
dataset = mod.groupby_sum(deaths, group)
dataset.head()
'''

"\ngroup = ['cause_code','Periodo']\ndataset = mod.groupby_sum(deaths, group)\ndataset.head()\n"

In [33]:
# Example 3
'''
dataset = mod.pivot_table(dataset, 'cause_code', 'Periodo')
dataset.head()
'''

"\ndataset = mod.pivot_table(dataset, 'cause_code', 'Periodo')\ndataset.head()\n"

## ...and finally, show me some insights with Plotly!!!

In [34]:
# Cufflinks histogram
'''
dataset_column.iplot(kind='hist',
                     title='VIZ TITLE',
                     yTitle='AXIS TITLE',
                     xTitle='AXIS TITLE')
'''

"\ndataset_column.iplot(kind='hist',\n                     title='VIZ TITLE',\n                     yTitle='AXIS TITLE',\n                     xTitle='AXIS TITLE')\n"

In [35]:
# Cufflinks bar plot
'''
dataset_bar.iplot(kind='bar',
                  x='VARIABLE',
                  xTitle='AXIS TITLE',
                  yTitle='AXIS TITLE',
                  title='VIZ TITLE')
'''

"\ndataset_bar.iplot(kind='bar',\n                  x='VARIABLE',\n                  xTitle='AXIS TITLE',\n                  yTitle='AXIS TITLE',\n                  title='VIZ TITLE')\n"

In [36]:
# Cufflinks line plot
'''
dataset_line.iplot(kind='line',
                   x='VARIABLE',
                   xTitle='AXIS TITLE',
                   yTitle='AXIS TITLE',
                   title='VIZ TITLE')
'''

"\ndataset_line.iplot(kind='line',\n                   x='VARIABLE',\n                   xTitle='AXIS TITLE',\n                   yTitle='AXIS TITLE',\n                   title='VIZ TITLE')\n"

In [37]:
# Cufflinks scatter plot
'''
dataset_scatter.iplot(x='VARIABLE', 
                      y='VARIABLE', 
                      categories='VARIABLE',
                      xTitle='AXIS TITLE', 
                      yTitle='AXIS TITLE',
                      title='VIZ TITLE')
'''

"\ndataset_scatter.iplot(x='VARIABLE', \n                      y='VARIABLE', \n                      categories='VARIABLE',\n                      xTitle='AXIS TITLE', \n                      yTitle='AXIS TITLE',\n                      title='VIZ TITLE')\n"