# Depth

By **Franklin Oliveira**

-----

This notebook outputs Depth charts for the `poliqueta` collection. Database is in <font color='blue'>'IBUFRJ27.07.2020 - visualização.xlsx'</font> and <font color='blue'>'MNRJP27.07.2020 - visualização.xls'</font>.

In [1]:
import datetime
import numpy as np
import pandas as pd

from collections import defaultdict

# pacotes para visualização rápida
import seaborn as sns
import matplotlib.pyplot as plt

# pacote para visualização principal
import altair as alt

# habilitando renderizador para notebook
# alt.renderers.enable('notebook')
alt.renderers.enable('default')


# desabilitando limite de linhas
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Importing data...

Importing pre-treated data in `1-data_treatment.ipynb`. In this notebook, I'm doing only some minnor adjustments for visualization purposes only. For a full traceback of data treatment, please see the `1-data_treatment` notebook.

In [2]:
# treated_db was the previous db I used to write all this code
NewTable = pd.read_csv('./data/merged_db.csv', sep=';', encoding='utf-8')

<br>

<font size=5>**Color Palette per Order**</font>

These images were used as inspiration (https://color.adobe.com/create/image)

<img src="./src/img1.jpg" width='500px'>

<img src="./src/img2.jpg" width='500px'>

Cores: 

    '#8ABFB0',  # azul claro
    '#41A681',  # verde
    '#7ACAAB',  # verde claro
    '#D9C2AD',  # bege
    '#0D0D0D',  # preto
    '#D96236',  # laranja
    '#D94B18',  # laranja escuro
    '#FFB27C',  # cor de pele clara
    '#732C02',  # marrom
    '#86471B',  # mostarda
    
cores temporárias:

    '#592202',  # marrom escuro
    '#D96236',  # laranja escuro

In [3]:
# 242 NaNs
NewTable['order'].value_counts(dropna=False)

Phyllodocida        2430
Eunicida            1496
Sabellida            753
Scolecida            698
Amphinomida          464
Spionida             436
Terebellida          387
NaN                  242
Sipuncula              9
Canalipalpata          6
Crassiclitellata       1
Aspidosiphonida        1
Name: order, dtype: int64

In [4]:
ordens = NewTable['order'].unique()
cores = [
    '#8ABFB0',  # azul claro
    '#41A681',  # verde
    '#7ACAAB',  # verde claro
    '#D9C2AD',  # bege
    '#0D0D0D',  # preto
    '#D96236',  # laranja
    '#D94B18',  # laranja escuro
    '#FFB27C',  # cor de pele clara
    '#732C02',  # marrom
    '#86471B',  # mostarda
    
    # cores novas para Canalipalpata e Aspidosiphonida (a ordem é aleatória. Fixar depois)
    '#592202',
    '#D96236'
]

cores_ordem = defaultdict()
for j in range(len(ordens)):
    ordem = ordens[j]
    cores_ordem[ordem] = cores[j]
    
cores_ordem = dict(cores_ordem)

<br>

---

## Graphs

### Total amount of catalogations per year

x: Start Year (from Start Date)
y: number of catalogations per year

In [5]:
# counting catalog. per year
teste = NewTable['start_year'].value_counts()
teste = teste.reset_index().rename(columns={'index':'year', 'start_year':'counts'})

In [6]:
# adjusting columns for graphs
teste['year'] = teste['year'].apply(lambda x:str(x).split('/')[0].split('.')[0]).astype(int)
teste = teste.groupby('year').sum().reset_index() # soma do total de bichos coletados por ano

In [7]:
# min e max para eixo X (year)
min_x = teste['year'].min()
max_x = teste['year'].max()

# taking natural log
teste['ln_counts'] = teste['counts']

<font color='red'>**ideia:** fazer crescimento em log (referencias em Bio. fazem isso)</font>

In [8]:
temp = alt.Chart(data= teste, width=800, title= 'Collected animals per year').mark_bar().encode(
    x= alt.X('year', type='ordinal', title='Sampling Year'),
    y= alt.Y('ln_counts', type='quantitative', title='Contagem')
)

temp.save('./graphs/coletas_por_ano.html')
temp

In [9]:
# counting catalog. per year
# teste = NewTable['cataloged_year'].value_counts()
# teste = teste.reset_index().rename(columns={'index':'year', 'cataloged_year':'counts'})

teste = NewTable.groupby(['cataloged_year', 'collection_prefix']).count()
teste = teste['catalog_number'].reset_index().rename(columns={'catalog_number':'counts'})

# min e max para eixo X (year)
min_x = teste['cataloged_year'].min()
max_x = teste['cataloged_year'].max()

In [15]:
temp = alt.Chart(data= teste, width=800, title= 'Cataloged animals per year').mark_bar().encode(
    x= alt.X('cataloged_year', type='ordinal', title='Cataloged Year'),
    y= alt.Y('counts', type='quantitative', title='Contagem'), 
    color= alt.Color('collection_prefix', title='Coleção')
)

temp.save('./graphs/catalogacoes_por_ano.html')
# temp.facet(row='collection_prefix').resolve_axis('independent').save(
#         './graphs/catalogacoes_por_ano-facetado.html')

temp

-----

## Depth per family

In [16]:
# subsetting
teste = NewTable[['min_depth','family','order', 'start_year', 'qualifier', 'catalog_number', 
                  'genus', 'species', 'collector_full_name', 'country','state','locality']].copy()

# sorting
teste = teste.sort_values(['min_depth','family'])

# dropping na
teste.dropna(subset=['min_depth'], inplace=True)

# making sure altitude is a floating point number
teste['min_depth'] = teste['min_depth'].astype(float)

# extremes for scale
max_y = teste['min_depth'].max()
min_y = teste['min_depth'].min()

In [17]:
temp = alt.Chart(teste, title='Depth per family', width=800).mark_circle().encode(
    x = alt.X('family', type='nominal', title='Family', 
              sort= alt.EncodingSortField('min_depth', op='max', order='ascending')),
    y = alt.Y('min_depth', type='quantitative', title='Depth (in meters)',
              scale = alt.Scale(domain=[max_y, min_y])),
    color= alt.Color('order', scale=alt.Scale(domain=ordens, range=cores)),
    tooltip = alt.Tooltip(['catalog_number', 'family','genus','species', 
                            'qualifier', 'start_year','collector_full_name',
                            'country', 'state', 'locality', 'min_depth'])
)

temp.save('./graphs/depth/depth_per_family.html')
temp.facet(row='order').save('./graphs/depth/depth_per_family-facetado.html')

temp

<br>

## Depth per genus

In [18]:
teste = NewTable[['min_depth','family','order', 'start_year', 'qualifier', 'catalog_number', 
                  'genus', 'species',  'collector_full_name',
                            'country', 'state', 'locality']].copy()

# making sure altitude is a floating point number
teste['min_depth'] = teste['min_depth'].astype(float)
teste['genus'] = teste['genus'].str.capitalize()

# sorting
teste = teste.sort_values(['min_depth','genus'])

# dropping na
teste = teste.dropna(subset=['min_depth'])

# extremes for y axis
max_y = teste['min_depth'].max()
min_y = teste['min_depth'].min()

In [19]:
# ordering x-axis per mean altitude - OUTLIER: ordem nula
temp = alt.Chart(teste, title='Depth per genus',
                width= 1500).mark_circle().encode(
    x = alt.X('genus', type='nominal', title='Genus',
             sort=alt.EncodingSortField('min_depth', op="max", order="ascending")),
    y = alt.Y('min_depth', type='quantitative', title='Depth (in meters)',
              scale= alt.Scale(domain= [max_y, min_y])),
    color = alt.Color('order', scale= alt.Scale(domain=ordens, range=cores)),
    tooltip = alt.Tooltip(['catalog_number', 'family', 'genus','species',
                            'qualifier', 'start_year','collector_full_name',
                            'country', 'state', 'locality', 'min_depth'])
)

temp.save('./graphs/depth/genus/depth-per-genus.html')
temp

<br>

## Separating per Min Depth

<font color='red' size=4>Separando grupos de "maior profundidade"</font>


In [20]:
genus = teste['genus'].unique()

d = defaultdict()

for gen in genus:
    depth = teste[teste['genus'] == gen]['min_depth'].max()
    d[gen] = depth
        
d = pd.DataFrame(dict(d), index=[0]).transpose().reset_index()
d.columns = ['genus', 'max_depth']

In [21]:
# divisão entre marés (sugerido pelo pessoal do Museu)
threshold = 200

# maior profundidade (>= 500m)
grupo1 = d[d['max_depth'] > threshold]['genus']

# menor profundidade
grupo2 = d[d['max_depth'] <= threshold]['genus']

#### grupo de maior profundidade

In [22]:
# ordering x-axis per mean altitude
temp = alt.Chart(teste[teste['genus'].isin(grupo1)], width= 800,
                 title='Depth per genus').mark_circle(color= cores[0]).encode(
    x = alt.X('genus', type='nominal', title='Genus',
             sort=alt.EncodingSortField('min_depth', op='max', order="ascending")),
    y = alt.Y('min_depth', type='quantitative', title='Depth (in meters)', 
              scale= alt.Scale(domain=[max_y, 0])),
    color = alt.Color('order', scale= alt.Scale(domain=ordens, range=cores)),
    tooltip = alt.Tooltip(['catalog_number', 'family', 'genus','species',
                            'qualifier', 'start_year','collector_full_name',
                            'country', 'state', 'locality', 'min_depth'])
)

temp.save('./graphs/depth/genus/depth_per_genus-maior-profundidade.html')
temp

#### grupo de menor profundidade

menos pontos (generos menos representados)

In [28]:
# ordering x-axis per mean altitude
temp = alt.Chart(teste[teste['genus'].isin(grupo2)], width= 800,
                 title='Depth per genus').mark_circle(color= cores[0]).encode(
    x = alt.X('genus', type='nominal', title='Genus',
             sort=alt.EncodingSortField('min_depth', op='max', order="ascending")),
    y = alt.Y('min_depth', type='quantitative', title='Depth (in meters)', 
              scale= alt.Scale(domain=[threshold, 0])),
    color = alt.Color('order', scale= alt.Scale(domain=ordens, range=cores)),
    tooltip = alt.Tooltip(['catalog_number', 'family', 'genus','species',
                            'qualifier', 'start_year','collector_full_name',
                            'country', 'state', 'locality', 'min_depth'])
)

temp.save('./graphs/depth/genus/depth_per_genus-menor-profundidade.html')
temp

<br>

**Thats it!**

-----