
# Family counts per year <font color='orange'>UNIQUE REGISTERS</font>

By **Franklin Oliveira**

<font color='red' size='4'>**OBS:** precisa atualizar a paleta de cores (vide os outros notebooks)</font>

-----
This notebook contains all code necessary to make the "type" charts from `poliqueta` database. Here you'll find some basic data treatment and charts' code. 

Database: <font color='blue'>'IBUFRJ27.07.2020 - visualização.xlsx'</font>.

In [1]:
import datetime
import numpy as np
import pandas as pd

from collections import defaultdict

# quick visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Waffle Charts
# from pywaffle import Waffle 
# docs: https://pywaffle.readthedocs.io/en/latest/examples/block_shape_distance_location_and_direction.html

# visualization
import altair as alt

# enabling notebook renderer
# alt.renderers.enable('notebook')
alt.renderers.enable('default')

# disabling rows limit
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Importing data...

In [2]:
NewTable = pd.read_csv('./data/merged_db.csv', sep=';', encoding='utf-8-sig')

<br>

<font size=5>**Color Palette per Order**</font>

These images were used as inspiration (https://color.adobe.com/create/image)

<img src="./src/img1.jpg" width='500px'>

<img src="./src/img2.jpg" width='500px'>

Cores: 

    '#8ABFB0',  # azul claro
    '#41A681',  # verde
    '#7ACAAB',  # verde claro
    '#D9C2AD',  # bege
    '#0D0D0D',  # preto
    '#D96236',  # laranja
    '#D94B18',  # laranja escuro
    '#FFB27C',  # cor de pele clara
    '#732C02',  # marrom
    '#86471B',  # mostarda

In [3]:
# 220 NaNs
NewTable['order'].value_counts(dropna=False)

Phyllodocida        2430
Eunicida            1496
Sabellida            753
Scolecida            698
Amphinomida          464
Spionida             436
Terebellida          387
NaN                  242
Sipuncula              9
Canalipalpata          6
Crassiclitellata       1
Aspidosiphonida        1
Name: order, dtype: int64

In [4]:
ordens = NewTable['order'].unique()
cores = [
    '#8ABFB0',  # azul claro
    '#41A681',  # verde
    '#7ACAAB',  # verde claro
    '#D9C2AD',  # bege
    '#0D0D0D',  # preto
    '#D96236',  # laranja
    '#D94B18',  # laranja escuro
    '#FFB27C',  # cor de pele clara
    '#732C02',  # marrom
    '#86471B',  # mostarda
    
    # cores novas para Canalipalpata e Aspidosiphonida (a ordem é aleatória. Fixar depois)
    '#592202',
    '#D96236'
]

cores_ordem = defaultdict()
for j in range(len(ordens)):
    ordem = ordens[j]
    cores_ordem[ordem] = cores[j]
    
cores_ordem = dict(cores_ordem)

<br>


## Graphs

---
### Creating chart: counts per order per year

In [5]:
temp = NewTable.drop_duplicates(subset=['start_year', 'order'])
orders = temp.groupby(['start_year','order']).count()['class'].reset_index().rename(columns={'class':'counts'})

orders.sort_values(['start_year','order'], inplace=True)  # ordering

In [6]:
# dropping remaining NaN's
orders = orders.dropna(subset=['order'])

In [7]:
# OBS: como estou contando registros únicos, esse gráfico fica binário.
g1 = alt.Chart(orders, width=800, height=200, 
               title='Number of collected polychaetas per order each year').mark_rect().encode(
    x= alt.X('start_year', type='ordinal', title='Sampling Year'),
    y= alt.Y('order', type='nominal', title='Order',
            sort= alt.EncodingSortField(field='counts', op='sum', order='descending')),
    size = alt.Size('counts', scale=alt.Scale(range=[10,600])),
    color = alt.Color('order', scale= alt.Scale(domain=ordens, range=cores)),
#     tooltip= alt.Tooltip(['start_year', 'counts'])
)

# saving graph
# g1.save('./graphs/orders_per_year-UNIQUE.html')

g1

### number of polychaetas per family per year

In [17]:
teste = NewTable.groupby(['family','start_year']).count()['class'].reset_index().rename(
                                                                                    columns={'class':'counts'})

teste['start_year'] = teste['start_year'].astype(int)

<br>

**graph:** family per year

In [18]:
g1 = alt.Chart(teste,
               width=500, height=500, title='Number of collected polychaeta of each family per year').mark_circle(
                                                                                size=60).encode(
    x= alt.X('start_year', type='ordinal', title='Sampling Year'),
    y= alt.Y('family', type='nominal', title='Family',
            sort= alt.EncodingSortField(field='counts', op='count', order='descending')),
    size= alt.Size('counts', title='Count', scale=alt.Scale(range=[6,400])),
    tooltip = alt.Tooltip(['family', 'start_year', 'counts'])
)

g1.save('./graphs/familias_por_ano.html')

g1

In [25]:
teste = NewTable.groupby(['family','order','start_year']).count()['class'].reset_index().rename(
                                                                                    columns={'class':'counts'})

teste['start_year'] = teste['start_year'].astype(int)

In [26]:
g1 = alt.Chart(teste,
               width=500, height=500, title='Number of collected animals of each family per year').mark_circle(
                                                                                size=60).encode(
    x= alt.X('start_year', type='ordinal', title='Sampling Year'),
    y= alt.Y('family', type='nominal', title='Family',
            sort= alt.EncodingSortField(field='counts', op='sum', order='descending')),
    size= alt.Size('counts', title='Count', scale=alt.Scale(range=[6,400])),
    color = alt.Color('order', scale= alt.Scale(domain=ordens, range=cores)),
    tooltip = alt.Tooltip(['family', 'start_year', 'counts'])
)

# g1.save('./graphs/familias_por_ano_c_ordem.html')

g1

<br>

### Dynamic version

<font color='red' size='4'>**p.s.:** Still needs more adjustments </font>

In [13]:
# # dynamic version
# slider = alt.binding_range(min=1900, max=2016, step=1)
# select_year = alt.selection_single(name="ano_coleta", fields=['ano_coleta'],
#                                    bind=slider, init={'ano_coleta': 2000})

# # gráfico transparente no background (para fixar eixos)
# g0 = alt.Chart(teste,
#                width=800, height=400).mark_circle(
#                                                                                 size=60, opacity=0).encode(
#     x= alt.X('familia', type='nominal', title='Familia'),
#     y= alt.Y('ordem', type='nominal', title='Ordem',
#             sort= alt.EncodingSortField(field='counts', op='count', order='descending')),
# )


# g1 = alt.Chart(teste,
#                width=800, height=400, title='Qtde. de animais por família e ordem').mark_circle(
#                                                                                 size=60).encode(
#     x= alt.X('familia', type='nominal', title='Familia'),
#     y= alt.Y('ordem', type='nominal', title='Ordem',
#             sort= alt.EncodingSortField(field='counts', op='count', order='descending')),
#     size= alt.Size('counts', title='Contagem'),
#     color = alt.Color('ordem', scale= alt.Scale(domain=ordens, range=cores)),
#     tooltip=alt.Tooltip(['familia','ordem','ano_coleta','counts','ordem'])
# ).add_selection(
#     select_year
# ).transform_filter(
#     select_year
# )

# g1 = alt.layer(g0, g1)

# # saving graph
# # g1.save('./graphs/familias_por_ano_c_ordem-dinamico.html')

# # g1

<br>

**The end!**

-----