# Type charts

By **Franklin Oliveira**

-----
This notebook contains all code necessary to make the "type" charts from `repteis` database. Here you'll find some basic data treatment and charts' code. 

Database: <font color='blue'>'Compilacao Livros Repteis - 2 a 10 - 2020_04_28.xls'</font>.

In [2]:
import datetime
import numpy as np
import pandas as pd

from collections import defaultdict

# pacotes para visualização rápida
import seaborn as sns
import matplotlib.pyplot as plt

# pacote para visualização principal
import altair as alt

# habilitando renderizador para notebook
# alt.renderers.enable('notebook')
alt.renderers.enable('default')


# desabilitando limite de linhas
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Importing data...

In [None]:
NewTable = pd.read_csv('./data/treated_db.csv', sep=';', encoding='utf-8-sig')

<br>

<font size=5>**Paleta de cores por Ordem**</font>

Abaixo está a imagem usada como inspiração (https://color.adobe.com/create/image)

<img src="./src/paleta_cores.jpeg" width='500px'>

Cores: 

- verde_escuro: #284021
- verde_claro: #88BF11
- amarelo: #D9CB0B
- laranja: #D99311
- laranja_escuro: #BF4417
- marrom-_laro: #BF8D7A

In [4]:
# OBS: caudata é erro na base. Deve ser retirada. 
cores_ordem = {
    'Squamata': '#BF4417',
    'Testudines': '#D9CB0B', 
    'Crocodylia': '#284021'
}

ordens = list(cores_ordem.keys())
cores = list(cores_ordem.values())

<br>


## Graphs

---

### Types (*per year*) per genus

x: Species1, cor: Type Status1, size: counts

In [31]:
# type info is in this column
NewTable['notas_taxonomicas'].value_counts()

Parátipo     227
Holótipo      28
Cótipo         2
Lectótipo      1
Topótipo       1
Name: notas_taxonomicas, dtype: int64

In [28]:
# subsetting
teste = NewTable[['altitude','familia','ordem', 'ano_coleta', 'qualificador_atual', 'numero_catalogo', 
                  'genero_atual', 'especie_atual', 'subespecie_atual', 'notas_taxonomicas']].copy()

# grouping by type, year and order
temp = teste.groupby(['notas_taxonomicas','ano_coleta', 'ordem']).count()['familia'].reset_index().rename(columns={
    'familia':'counts'
})

# p.s.: Cótipo and Topótipo are not types
temp = temp[(temp['notas_taxonomicas'] != 'Cótipo') & (temp['notas_taxonomicas'] != 'Topótipo')]

243 info. de tipos

### Gráf. de Tipos

In [29]:
# teste[(~teste['tipo'].isna()) & (teste['tipo'] != 'Topótipo')]
tipo = alt.Chart(temp, height=150, title='Types per year').mark_circle().encode(
    x = alt.X('ano_coleta:O', title='collected year'),
    y = alt.Y('notas_taxonomicas:N', title= 'type',
              sort=alt.EncodingSortField('tipo', op='count', order='descending')),
    color= alt.Color('ordem', scale=alt.Scale(domain=ordens, range=cores), title='order'), 
    size= alt.Size('counts', scale=alt.Scale()),
    tooltip= [alt.Tooltip('notas_taxonomicas', title='type'),
              alt.Tooltip('ano_coleta', title='collected year'),
              alt.Tooltip('counts', title='counts')]
)

tipo.save('./graphs/tipo/tipos_por_ano.html')

tipo

<br>

<font size='6'>Para a base de repteis acaba aqui, por enquanto </font>

<br>

#### defining some parameters

colors = ['#d62728', '#f58518', '#d95f02',
          '#d62729', '#f58519', '#d95f03',
          '#4daf4a', '#8c6d31',
          '#79706e', '#bab0ac', '#d8b5a5'
         ]

types = ['Holotipo', 'Alotipo', 'Neotipo',
          'Sintipo', 'Lectotipo', 'Paralectotipo',
          'Paratipo', 'Topotipo', 
          'Tipo', 'Co-tipo', 'Material tipo'
        ]

opacities= [1, 1, 1,
           0.4,0.4,0.4,
           1,1,1,
           1,1,1]

# creating dictionary with type:color
type_color = defaultdict()
for t in range(len(types)):
    type_color[types[t]] = colors[t] 

<br>

### ordering per holotype year (year in which the holotype was discovered)

counts = teste1.groupby(['determiner_first_and_last_name', 'type_status','holotipo_year']).count()['class'].reset_index()
counts.rename(columns={'class':'counts'}, inplace=True)

teste1 = pd.merge(teste1, counts, on=['determiner_first_and_last_name', 'type_status','holotipo_year'])

# sorting in subsample (again)
temp = teste1[(~teste1['determiner_first_and_last_name'].isna()) & (~teste1['type_status'].isna())
                      & (~teste1['start_year'].isna())].copy()

teste1.sort_values(['holotipo_year','determiner_first_and_last_name', 'counts'], inplace=True)

# names ordering for y axis
names_ordering = list(
    temp[~temp['determiner_first_and_last_name'].isna()]['determiner_first_and_last_name'].unique()
)

holotipo = teste1[(teste1['type_status'] == 'Holotipo')].copy()  # filtering only holotype
holotipo = holotipo[~holotipo['determiner_first_and_last_name'].isna()].copy()  # making sure names are 
#                                                                                nonempty...
# sorting
holotipo.sort_values('holotipo_year', inplace=True)

# sorting per holotipo year
names_ordering = list(holotipo['determiner_first_and_last_name'].values)

temp = holotipo.groupby(['order','start_year']).count()['count'].reset_index()

g1 = alt.Chart(temp[temp['order'] != 'Nan'],
               width=800, height=400, title='Number of holotypes of each order per year').mark_circle(size=60,
                                                                                color='red').encode(
    x= alt.X('start_year', type='ordinal', title='Year'),
    y= alt.Y('order', type='nominal', title='Order',
            sort= alt.EncodingSortField(field='count', op='max', order='descending')),
    size = alt.Size('count'),
)

# saving graph
# g1.save(f'./types/counts_per_year/holotypes_per_order.svg')
# g1.save(f'./types/counts_per_year/holotypes_per_order.png')
# g1.save(f'./types/counts_per_year/holotypes_per_order.html')

g1

### separating and grouping types

types = teste1['type_status'].unique()

group_types = []
for t in types:
    if t not in ['Holotipo', 'Alotipo', 'Paratipo']:
        group_types.append(t)

# replicating for ALL types
for t in ['Holotipo', 'Alotipo', 'Paratipo']:
    # preparing data
    temp1 = teste1[(teste1['type_status'] == t)].copy()                   # filtering only holotype
    temp = temp1[~temp1['determiner_first_and_last_name'].isna()].copy()  # making sure names are 
                                                                          # nonempty...
    # sorting
    temp.sort_values('holotipo_year', inplace=True)

    # sorting per holotipo year
    names_ordering = list(temp['determiner_first_and_last_name'].values)
    
    # grouping and counting per order and year
    temp = temp.groupby(['order','start_year']).count()['count'].reset_index()

    ### Chart
    g1 = alt.Chart(temp[temp['order'] != 'Nan'],
                   width=800, height=400, title=f'Number of {t} of each order per year').mark_circle(size=60,
                                                                     color=f'{type_color[t]}').encode(
        x= alt.X('start_year', type='ordinal', title='Year'),
        y= alt.Y('order', type='nominal', title='Order',
                sort= alt.EncodingSortField(field='count', op='max', order='descending')),
        size = alt.Size('count'),
    #     color= alt.Color('holotipo_year', type='quantitative', scale= alt.Scale(scheme='reds')), 
    #     opacity= alt.Opacity(scale= alt.Scale(domain=types,range=opacities), type='quantitative')
    #     size=alt.Size('depth')
    )

    # saving graph
#     g1.save(f'./types/counts_per_year/{t}_per_order.svg')
#     g1.save(f'./types/counts_per_year/{t}_per_order.png')
#     g1.save(f'./types/counts_per_year/{t}_per_order.html')

# g1

<br>

#### graph of other types

t = teste1['order'].unique()

temp = teste1[(teste1['order'] != 'Nan') & (teste1['type_status'].isin(group_types))]
temp = temp.groupby(['order','start_year','type_status']).count()['count'].reset_index()

colors = ['']

<font color='red' size='5'>**p.s.:** temporary adjustment!!!! </font>

I'm grouping (replacing) `Material tipo` and `co-tipo` in (for) `tipo`.

temp['type_status'] = temp['type_status'].str.replace('Material tipo', 'Tipo')
temp['type_status'] = temp['type_status'].str.replace('Topotipo', 'Tipo')
temp['type_status'] = temp['type_status'].str.replace('Co-tipo', 'Tipo')

**graph**

# making graph for other types (group_types)
g1 = alt.Chart(temp,
               width=800, height=400, title='Number of types of each order per year').mark_circle(
                                                                                size=60).encode(
    x= alt.X('start_year', type='ordinal', title='Year'),
    y= alt.Y('order', type='nominal', title='Order',
            sort= alt.EncodingSortField(field='count', op='max', order='descending')),
    color= alt.Color('type_status', title='type',
                     scale= alt.Scale(domain= list(temp['type_status'].unique()),
                                      range=[type_color[t] for t in list(temp['type_status'].unique())])),
    size = alt.Size('count', type='quantitative', scale = alt.Scale(type='bin-ordinal')),
)

# saving graph
# g1.save(f'./types/counts_per_year/other-types_per_order.svg')
# g1.save(f'./types/counts_per_year/other-types_per_order.png')
# g1.save(f'./types/counts_per_year/other-types_per_order.html')

g1

<br>

## New proposal

compare: Year in which the holotype was discovered v.s. Year in which it was firstly cataloged in the Museum

# p.s.: there are overlapping points (it's affecting opacity)
g1 = alt.Chart(holotipo, title='Holotypes discovered and determined years',
               width=600, height=500).mark_circle(size=60).encode(
    x= alt.X('start_year', type='ordinal', title='Discovered Year'),
    y= alt.Y('holotipo_year', type='ordinal', title='Start Year',
            sort= alt.EncodingSortField(field='holotipo_year', order='descending')),
    tooltip= alt.Tooltip(['holotipo_year', 'start_year']),
    color= alt.Color('order', type='nominal', scale= alt.Scale(scheme='reds')), 
#     opacity= alt.Opacity(scale= alt.Scale(domain=types,range=opacities), type='quantitative')
#     size=alt.Size('depth')
)

# saving graph
# g1.save(f'./types/holotipo_discoreved_vs_start_year.svg')
# g1.save(f'./types/holotipo_discoreved_vs_start_year.png')
# g1.save(f'./types/holotipo_discoreved_vs_start_year.html')

g1

<br>

**The end!**

-----