# Deadly Visualizations!!!

![Image](../images/viz_types_portada.png)

## Setup

First we need to create a basic setup which includes:

- Importing the libraries.

- Reading the dataset file (source [Instituto Nacional de Estadística](https://www.ine.es/ss/Satellite?L=es_ES&c=Page&cid=1259942408928&p=1259942408928&pagename=ProductosYServicios%2FPYSLayout)).

- Create a couple of columns and tables for the analysis.

__NOTE:__ some functions were already created in order to help you go through the challenge. However, feel free to perform any code you might need.

In [None]:
# imports

import sys
import re
sys.path.insert(0, "../modules")

import numpy as np
import pandas as pd

import plotly.express as px
import cufflinks as cf
cf.go_offline()

import module as mod     # functions are include in module.py

In [None]:
# read dataset

deaths = pd.read_csv('../data/7947.csv', sep=';', thousands='.')

deaths.info()

In [None]:
deaths.head()


In [None]:
population = pd.read_csv('../data/31304bsc.csv', sep=';', thousands='.')

population.info()

In [None]:
population.head()


In [None]:
print(len(deaths['Periodo'].unique()))
deaths['Periodo'].unique()

In [None]:
# add some columns...you'll need them later

deaths['cause_code'] = deaths['Causa de muerte'].apply(mod.cause_code)
deaths['cause_group'] = deaths['Causa de muerte'].apply(mod.cause_types)
deaths['cause_name'] = deaths['Causa de muerte'].apply(mod.cause_name)

deaths.info()

In [None]:
deaths

In [None]:
# lets check the categorical variables

var_list = ['Sexo', 'Edad', 'Periodo', 'cause_code', 'cause_name', 'cause_group']

categories = mod.cat_var(deaths, var_list)
categories

In [None]:
# we need also to create a causes table for the analysis

causes_table = deaths[['cause_code', 'cause_name']].drop_duplicates().sort_values(by='cause_code').reset_index(drop=True)

causes_table

## Lets make some transformations

Eventhough the dataset is pretty clean, the information is completely denormalized as you could see. For that matter a collection of methods (functions) are available in order to generate the tables you might need:

- `row_filter(df, cat_var, cat_values)` => Filter rows by any value or group of values in a categorical variable.

- `nrow_filter(df, cat_var, cat_values)` => The same but backwards. 

- `groupby_sum(df, group_vars, agg_var='Total', sort_var='Total')` => Add deaths by a certain variable.

- `pivot_table(df, col, x_axis, value='Total')`=> Make some pivot tables, you might need them...

__NOTE:__ be aware that the filtering methods can perform a filter at a time. Feel free to perform the filter you need in any way you want or feel confortable with.

In [None]:
# Example 1

dataset = mod.row_filter(deaths, 'Sexo', ['Total'])
dataset = mod.row_filter(dataset, 'Edad', ['Todas las edades'])
dataset.head()


In [None]:
# Example 2

group = ['cause_code','Periodo']
dataset = mod.groupby_sum(deaths, group)
dataset.head()


In [None]:
# Example 3

dataset = mod.pivot_table(dataset, 'cause_code', 'Periodo')
dataset.head()


## ...and finally, show me some insights with Plotly!!!

In [None]:
# And some space for free-style Pandas!!! (e.g.: df['column_name'].unique())




In [None]:
# clean dataset population (year, population)
population_year = population.loc[population['Periodo'].str.startswith('1 de enero de')]
population_year['Periodo'] = population_year['Periodo'].str.replace('1 de enero de ', '')
population_year['Periodo'] = pd.to_numeric(population_year['Periodo'])
population_year = population_year.drop(['Sexo', 'Edad', 'Provincias'], axis=1)
population_year = population_year.rename(columns={"Total": "Demo"})
print(len(population))
print(len(population_year))
population_year.head()


In [None]:
# clean dataset deaths (all sex, all ages, every cause)
deaths_tot = deaths[(deaths['Sexo'] == 'Total') & 
                    (deaths['Edad'] == 'Todas las edades') &
                    (deaths['cause_group'] == 'Single cause')]
print(len(deaths))
print(len(deaths_tot))
deaths_tot.head()


In [None]:
population_year.info()

In [None]:
deaths_tot.info()

In [None]:
# join datasets deaths and population and calculate percentage
deaths_tot = deaths_tot.merge(population_year)
deaths_tot['Percent'] = deaths_tot.apply(lambda x: x['Total'] * 100 / x['Demo'], axis=1)
deaths_tot['PercentK'] = deaths_tot.apply(lambda x: x['Total'] * 100 / x['Demo'] * 100000, axis=1)
deaths_tot.head()


In [None]:
# Total deaths every year
deaths_tot_year = deaths_tot.groupby(['Periodo'])[['Total', 
                                                   'Percent', 
                                                   'PercentK']].sum().reset_index()
print(len(deaths_tot_year))
deaths_tot_year.head()


In [None]:
# Graph total deaths
deaths_tot_year.iplot(kind='line',
                      x='Periodo',
                      y='Total',
                      xTitle='Year',
                      yTitle='Total',
                      title='Evolution deaths. Total',
                      dimensions =(900,300))


In [None]:

# Graph total deaths by population
deaths_tot_year.iplot(kind='line',
                      x='Periodo',
                      y='Percent',
                      xTitle='Year',
                      yTitle='Total / population',
                      title='Evolution deaths. Total by population',
                      dimensions =(900,300))


In [None]:
# Significant by population


In [None]:
deaths_tot.head()

In [None]:
# Total deaths every cause
deaths_tot_cause = deaths_tot.groupby(['cause_code', 
                                       'cause_name'])[['Total', 
                                                       'Percent', 
                                                       'PercentK']].sum().reset_index()
print(len(deaths_tot_cause))
deaths_tot_cause = deaths_tot_cause.sort_values(by=['Total'], ascending=False).head(10)
print(len(deaths_tot_cause))
deaths_tot_cause


In [None]:
# Graph total deaths every cause
deaths_tot_cause.iplot(kind='bar',
                       x='cause_name',
                       y='Total',
                       xTitle='Cause',
                       yTitle='Total',
                       title='Total by cause (10 most frequent causes)')


In [None]:
# 10 most causes by sex
most_causes = deaths_tot_cause[['cause_code']]
deaths_sex = most_causes.merge(deaths)
deaths_sex = deaths_sex[(deaths_sex['Sexo'] != 'Total') & 
                        (deaths_sex['Edad'] == 'Todas las edades')]
deaths_sex

In [None]:
deaths_tot_sex = deaths_sex.groupby(['Sexo',
                                     'cause_code', 
                                     'cause_name'])[['Total']].sum().reset_index()
print(len(deaths_tot_sex))
deaths_tot_sex

In [None]:
# Graph total deaths most causes by sex
graph = px.bar(deaths_tot_sex, 
               x = 'cause_name',
               y = 'Total', 
               color = 'Sexo', 
               title = 'Mostly causes by sex',
               labels={'cause_name':'Cause', 
                       'Total':'Total', 
                       'Sexo': 'Sex'})

graph.update_layout(barmode='group', xaxis={'categoryorder': 'total descending'})

graph.show()

In [None]:
print(len(causes_table))
causes_single = causes_table.loc[causes_table['cause_code'].str.contains('-') == False]
print(len(causes_single))

In [None]:
display(causes_single.head(51))
display(causes_single.tail(51))

In [None]:
# Evolution most frequent causes. Selected most frequent cause, two increasing causes and other curious causes
selected_causes = pd.DataFrame(['059', '018', '046', '090', '098'], columns=['cause_code'])
deaths_causes_evo = selected_causes.merge(deaths_tot)
print(len(deaths_causes_evo))
deaths_causes_evo.head()

In [None]:
# Graph evolution most frequent causes
graph = px.line(deaths_causes_evo, 
                x = 'Periodo',
                y = 'Total', 
                color = 'cause_name',
                title = 'Evolution most frequent causes',
                labels={'Periodo':'Year', 
                        'Total':'Total', 
                        'cause_name': ''})

graph.update_layout(legend=dict(orientation='v'))

graph.show()

In [None]:
# Cause: 098 - suicide. Total
deaths_cause_tot_098 = deaths_tot[deaths_tot['cause_code'] == '098']
print(len(deaths_cause_tot_098))
deaths_cause_tot_098.head()

In [None]:
# Cause: 098 - suicide. By sex, ages
deaths_cause_098 = deaths[deaths['cause_code'] == '098']
deaths_cause_098 = deaths_cause_098[(deaths_cause_098['Sexo'] != 'Total') &
                                    (deaths_cause_098['Edad'] != 'Todas las edades')] 
deaths_cause_098.head()

In [None]:
# Graph total suicide by year
deaths_cause_tot_098.iplot(kind='line',
                           x='Periodo',
                           y='Total',
                           xTitle='Year',
                           yTitle='Total',
                           title='Total suicide')


In [None]:
# Graph total suicide (population) by year
deaths_cause_tot_098.iplot(kind='line',
                           x='Periodo',
                           y='Percent',
                           xTitle='Year',
                           yTitle='Total / population',
                           title='Total suicide by population')


In [None]:
# Not significant by population


In [None]:
# Graph total suicide by sex
graph = px.pie(deaths_cause_098, 
               values = 'Total',
               names = 'Sexo',
               title = 'Total suicide',
               color = 'Sexo',
               color_discrete_map={'Hombres': 'grey',
                                   'Mujeres': 'gold'})

graph.update_traces(textposition = 'none')

graph.show()


In [None]:
# Graph total suicide by year, sex
deaths_cause_098_sex = deaths_cause_098.groupby(['Sexo', 'Periodo'])[['Total']].sum().reset_index()

graph = px.bar(deaths_cause_098_sex, 
               x = 'Periodo',
               y = 'Total',
               color = 'Sexo',
               title = 'Total suicide',
               labels={'Periodo':'Year', 
                       'Total':'Total', 
                       'Sexo': 'Sex'})

graph.show()

In [None]:
# Graph total suicide by age
deaths_cause_098_age = deaths_cause_098.groupby(['Edad'])[['Total']].sum().reset_index()
print(len(deaths_cause_098_age))

graph = px.bar(deaths_cause_098_age, 
               x = 'Edad',
               y = 'Total',
               title = 'Total suicide',
               labels={'Edad':'Age', 
                       'Total':'Total'})

graph.update_layout(xaxis={'categoryorder': 'total descending'})

graph.show()


In [None]:
deaths_cause_098_age_year_max

In [None]:
# Age suicide by year
deaths_cause_098_age_year = deaths_cause_098.groupby(['Edad', 'Periodo'])[['Total']].sum().reset_index()

deaths_cause_098_age_year_max = deaths_cause_098_age_year.loc[deaths_cause_098_age_year.groupby('Periodo')['Total'].idxmax()].reset_index(drop=True)

graph = px.bar(deaths_cause_098_age_year_max, 
               x = 'Periodo',
               y = 'Total',
               color = 'Edad',
               title = 'Age suicide',
               labels={'Periodo':'Year', 
                       'Total':'Total'})

graph.show()



### en proceso.....

In [None]:
# Cufflinks histogram




In [None]:
# Cufflinks bar plot
'''
dataset_bar.iplot(kind='bar',
                  x='VARIABLE',
                  xTitle='AXIS TITLE',
                  yTitle='AXIS TITLE',
                  title='VIZ TITLE')
'''

In [None]:
# Cufflinks line plot
'''
dataset_line.iplot(kind='line',
                   x='VARIABLE',
                   xTitle='AXIS TITLE',
                   yTitle='AXIS TITLE',
                   title='VIZ TITLE')
'''

In [None]:
# Cufflinks scatter plot
'''
dataset_scatter.iplot(x='VARIABLE', 
                      y='VARIABLE', 
                      categories='VARIABLE',
                      xTitle='AXIS TITLE', 
                      yTitle='AXIS TITLE',
                      title='VIZ TITLE')
'''