# Family counts per year

By **Franklin Oliveira**

-----
This notebook contains all code necessary to make the "type" charts from `repteis` database. Here you'll find some basic data treatment and charts' code. 

Database: <font color='blue'>'Compilacao Livros Repteis - 2 a 10 - 2020_04_28.xls'</font>.

In [2]:
import datetime
import numpy as np
import pandas as pd

from collections import defaultdict

# quick visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Waffle Charts
# from pywaffle import Waffle 
# docs: https://pywaffle.readthedocs.io/en/latest/examples/block_shape_distance_location_and_direction.html

# visualization
import altair as alt

# enabling notebook renderer
# alt.renderers.enable('notebook')
alt.renderers.enable('default')

# disabling rows limit
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Importing data...

In [3]:
NewTable = pd.read_csv('./data/treated_db.csv', sep=';', encoding='utf-8-sig')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


<br>

<font size=5>**Paleta de cores por Ordem**</font>

Abaixo está a imagem usada como inspiração (https://color.adobe.com/create/image)

<img src="./src/paleta_cores.jpeg" width='500px'>

Cores: 

- verde_escuro: #284021
- verde_claro: #88BF11
- amarelo: #D9CB0B
- laranja: #D99311
- laranja_escuro: #BF4417
- marrom-_laro: #BF8D7A

In [4]:
# input do especialista: Caudata é um erro da base
cores_ordem = {
    'Squamata': '#BF4417',
    'Testudines': '#D9CB0B', 
    'Crocodylia': '#284021',
}

ordens = list(cores_ordem.keys())
cores = list(cores_ordem.values())

<br>


## Graphs

---
### Creating chart: counts per order per year

In [5]:
orders = NewTable.groupby(['ano_coleta','ordem']).count()['class'].reset_index().rename(columns={'class':'counts'})

orders.sort_values(['ano_coleta','ordem'], inplace=True)  # ordering

In [6]:
# dropping remaining NaN's
orders = orders.dropna(subset=['ordem'])

In [7]:
g1 = alt.Chart(orders[orders['ordem'] != 'Caudata'],
               width=800, height=300, title='Number of collected repteis per order each year').mark_circle(
                                                                                color='green').encode(
    x= alt.X('ano_coleta', type='ordinal', title='Year'),
    y= alt.Y('ordem', type='nominal', title='Order',
            sort= alt.EncodingSortField(field='count', op='max', order='descending')),
    size = alt.Size('counts', scale=alt.Scale(range=[10,600])),
    color = alt.Color('ordem', scale= alt.Scale(domain=ordens, range=cores)),
    tooltip= alt.Tooltip(['ano_coleta', 'counts'])
)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# saving graph
g1.save('./graphs/orders_per_year.html')

g1

### number of reptiles per family per year

In [8]:
teste = NewTable.groupby(['familia','ano_coleta']).count()['class'].reset_index().rename(
                                                                                    columns={'class':'counts'})

teste['ano_coleta'] = teste['ano_coleta'].astype(int)

<br>

**graph:** family per year

In [10]:
g1 = alt.Chart(teste,
               width=800, height=400, title='Number of collected animals of each family per year').mark_circle(
                                                                                size=60).encode(
    x= alt.X('ano_coleta', type='ordinal', title='Ano de Coleta'),
    y= alt.Y('familia', type='nominal', title='Familia',
            sort= alt.EncodingSortField(field='counts', op='count', order='descending')),
    size= alt.Size('counts', title='Count'),
    tooltip = alt.Tooltip(['familia', 'ano_coleta', 'counts'])
)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

g1.save('./graphs/familias_por_ano.html')

# g1

In [11]:
teste = NewTable.groupby(['familia','ordem','ano_coleta']).count()['class'].reset_index().rename(
                                                                                    columns={'class':'counts'})

teste['ano_coleta'] = teste['ano_coleta'].astype(int)

In [13]:
g1 = alt.Chart(teste,
               width=800, height=400, title='Number of collected animals of each family per year').mark_circle(
                                                                                size=60).encode(
    x= alt.X('ano_coleta', type='ordinal', title='Collected Year'),
    y= alt.Y('familia', type='nominal', title='Family',
            sort= alt.EncodingSortField(field='counts', op='count', order='descending')),
    size= alt.Size('counts', title='Count'),
    color = alt.Color('ordem', scale= alt.Scale(domain=ordens, range=cores)),
    tooltip = alt.Tooltip(['familia', 'ano_coleta', 'counts'])
)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

g1.save('./graphs/familias_por_ano_c_ordem.html')

# g1

<br>

### Dynamic version

<font color='red' size='4'>**p.s.:** Still needs more adjustments </font>

In [14]:
# # dynamic version
# slider = alt.binding_range(min=1900, max=2016, step=1)
# select_year = alt.selection_single(name="ano_coleta", fields=['ano_coleta'],
#                                    bind=slider, init={'ano_coleta': 2000})

# # gráfico transparente no background (para fixar eixos)
# g0 = alt.Chart(teste,
#                width=800, height=400).mark_circle(
#                                                                                 size=60, opacity=0).encode(
#     x= alt.X('familia', type='nominal', title='Familia'),
#     y= alt.Y('ordem', type='nominal', title='Ordem',
#             sort= alt.EncodingSortField(field='counts', op='count', order='descending')),
# )


# g1 = alt.Chart(teste,
#                width=800, height=400, title='Qtde. de animais por família e ordem').mark_circle(
#                                                                                 size=60).encode(
#     x= alt.X('familia', type='nominal', title='Familia'),
#     y= alt.Y('ordem', type='nominal', title='Ordem',
#             sort= alt.EncodingSortField(field='counts', op='count', order='descending')),
#     size= alt.Size('counts', title='Contagem'),
#     color = alt.Color('ordem', scale= alt.Scale(domain=ordens, range=cores)),
#     tooltip=alt.Tooltip(['familia','ordem','ano_coleta','counts','ordem'])
# ).add_selection(
#     select_year
# ).transform_filter(
#     select_year
# )

# g1 = alt.layer(g0, g1)

# # saving graph
# # g1.save('./graphs/familias_por_ano_c_ordem-dinamico.html')

# # g1

<br>

**The end!**

-----