# Time-Spacial Exploration

By **Franklin Oliveira**

-----
This notebook contains all code necessary to make charts from `carcinos` database with focus on time and space exploration. Here you'll find some basic data treatment and charts' code. 

Database: <font color='blue'>'Planilha geral Atualizada FINAL 5_GERAL_sendo trabalhada no Google drive.xlsx'</font>
    

In [1]:
import datetime
import numpy as np
import pandas as pd

from collections import defaultdict

# quick visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Waffle Charts
# from pywaffle import Waffle 
# docs: https://pywaffle.readthedocs.io/en/latest/examples/block_shape_distance_location_and_direction.html

# visualization
import altair as alt

# enabling notebook renderer
# alt.renderers.enable('notebook')
# alt.renderers.enable('default')

# disabling rows limit
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Importing data...

In [2]:
NewTable = pd.read_csv('./data/treated_db.csv', sep=';', encoding='utf-8-sig', low_memory=False)

## Filtering

At least for now, we'll be considering only specimens of order decapoda (deeply revised by the Museum's crew)

In [3]:
decapoda = NewTable[NewTable['order'] == 'Decapoda'].copy()

<br>

<font size=5>**Color palette**</font>

Colors (per infraorder): 

- <font color='#e26d67'><b>Ascacidae</b></font>
- <font color='#007961'><b>Anomura</b></font>
- <font color='#7a2c39'><b>Achelata</b></font>
- <font color='#b67262'><b>Axiidea</b></font>
- <font color='#ee4454'><b>Brachyura</b></font>
- <font color='#3330b7'><b>Caridea</b></font>
- <font color='#58b5e1'><b>Gebiidea</b></font>
- <font color='#b8e450'><b>Stenopodídea</b></font>
- <font color='#a0a3fd'><b>Astacidae</b></font>
- <font color='#deae9e'><b>Polychelida</b></font>
- <font color='#d867be'><b>Grapsidae</b></font>
- <font color='#fece5f'><b>Xanthoidea</b></font>

In [5]:
# importing customized color palettes
from src.MNViz_colors import *

In [6]:
# p.s.: Caudata is an error and should be removed
# cores_ordem = {
#     'Squamata': '#BF4417',
#     'Testudines': '#D9CB0B', 
#     'Crocodylia': '#284021'
# }

# ordens = list(cores_ordem.keys())
# cores = list(cores_ordem.values())

<br>

## Graph: Exploring time-spacial evolution of the database

In [7]:
# corrects some typos
continent_typos = {
    'America do Sul':'América do Sul',
    'America do Norte':'América do Norte',
    'America Central':'América Central',
    'Africa':'África',
    'Asia':'Ásia'
}

decapoda['continent'] = decapoda['continent'].apply(lambda x: continent_typos[x] if x in continent_typos.keys() else x)

In [8]:
# looking good...
decapoda['continent'].value_counts()

América do Sul      8144
Desconhecido         106
Europa                24
América do Norte      21
Oceania                9
América Central        8
África                 6
Ásia                   2
Name: continent, dtype: int64

### Chart 1: Evolution per time and continent

In [9]:
decapoda[decapoda['family'] == 'Xanthidae']['infraorder'].unique()

array(['Brachyura', 'Xanthoidea'], dtype=object)

In [10]:
# agrupando por Ano e Continente
teste = NewTable.groupby(['start_year','continent', 'family']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

In [11]:
g1 = alt.Chart(teste, title='Evolução temporal da coleção por continente', height=300, width=2000).mark_circle().encode(
    x= alt.X('start_year', type="ordinal", title='Ano de Coleta'),
    y= alt.Y('continent', title='Continente', 
             sort=alt.EncodingSortField('counts', op="count", order='descending')), 
    size=alt.Size('counts', scale= alt.Scale(range=[30,1200]),
                  legend= alt.Legend(orient='bottom')), 
    order= alt.Order('counts', sort='descending'),  # smaller points in front
#     color= alt.Color('ordem', scale=alt.Scale(domain=ordens, range=cores)),  # old palette per order
    color= alt.Color('family:N',title= 'Família', 
                    legend= alt.Legend(columns=10, symbolLimit= 102, orient='bottom'),
                    scale= alt.Scale(domain= list(cores_familia_naive.keys()), 
                                     range= list(cores_familia_naive.values()))),
    tooltip= alt.Tooltip(['continent','start_year','family','counts'])
)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/time-spacial/evolucao_por_continente-FAMILIA.html')

# g1

<br>

### Chart 2: Evolution per time and country

In [12]:
# agrupando por Ano e Continente
teste1 = decapoda.groupby(['start_year','country', 'family']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

In [13]:
# retirei Brasil (será detalhado por Estados depois)
g1 = alt.Chart(teste1[teste1['country'] != 'Brasil'], title='Animais coletados por ano em outros países',
              width=800).mark_circle().encode(
    x= alt.X('start_year', type="ordinal", title='Ano de Coleta'),
    y= alt.Y('country', title='País', 
             sort=alt.EncodingSortField('counts', op="sum", order='descending')), 
    size=alt.Size('counts', scale= alt.Scale(range=[20,250]),
                  legend= alt.Legend(orient='right', columns=6)), 
    order= alt.Order('counts', sort='descending'),  # smaller points in front
#     color= alt.Color('ordem', scale=alt.Scale(domain=ordens, range=cores)),  # old palette per order
    color= alt.Color('family:N', title='Familia', 
                    legend= alt.Legend(columns=4, symbolLimit=102, orient='right'), 
                    scale= alt.Scale(domain= list(cores_familia_naive.keys()), 
                                     range= list(cores_familia_naive.values()))),
    tooltip= alt.Tooltip(['country','start_year','family','counts'])
)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/time-spacial/evolucao_por_pais.html')

# g1

<br>

### Chart 3: Evolution per time and brazilian states

In [19]:
# parsing into string
decapoda['braz_region'] = decapoda['braz_region'].astype(str)

# agrupando por Ano e Continente
teste2 = decapoda[decapoda['country'] == 'Brasil']
teste2 = teste2.groupby(['start_year','state', 'braz_region', 'family']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

In [21]:
teste2['state_and_region'] = teste2['state'] + ', ' + teste2['braz_region']

#### creating column with brazilian regions

In [41]:
# regioes = {
#     'Rio de Janeiro':'SE',
#     'São Paulo':'SE',
#     'Espírito Santo': 'SE',
#     'Pernambuco':'NE',
#     'Santa Catarina':'S',
#     'Amazonas':'N',
#     'Goiás':'CO',
#     'Roraima':'N',
#     'Pará':'N',
#     'Mato Grosso':'CO',
#     'Acre': 'N',
#     'Bahia': 'NE',
#     'Minas Gerais': 'SE',
#     'Mato Grosso do Sul': 'CO',
#     'Paraná': 'S',
#     'Rondônia': 'N',
#     'Ceará': 'NE',
#     'Maranhão': 'N',
#     'Rio Grande do Sul': 'S',
#     'Paraíba': 'NE',
#     'Distrito Federal': 'CO',
#     'Alagoas': 'NE',
#     'Amapá':'N',
#     'Piauí': 'NE',
#     'Brasília': 'CO',
#     'Tocantins': 'N',
#     'Rio Grande do Norte': 'NE',
#     'Sergipe': 'NE',
#     'Minas Gerais/Goiás/Distrito Federal': 'CO',
#     'Santa Catarina-Rio Grande do Sul': 'S'
# }

# # criando coluna com as regiões
# teste2['regiao'] = teste2['estado_ou_provincia'].apply(lambda x: regioes[str(x)])

# # coluna com estado, regiao
# teste2['regiao_e_estado'] = teste2['estado_ou_provincia'] + ', ' + teste2['regiao']

# ordenando por região e soma das contagens
sorting = teste2.groupby(['braz_region', 'state_and_region']).sum()['counts'].reset_index(
                                                                ).rename(columns={'counts':'soma'})
sorting = sorting.sort_values(['braz_region','soma'], ascending=False)['state_and_region'].unique()

In [60]:
g1 = alt.Chart(teste2, title='Animais coletados por ano nos Estados Brasileiros',
                ).mark_circle(opacity=0.8).encode(
    x= alt.X('start_year', type="ordinal", title='Ano de Coleta'),
    y= alt.Y('state_and_region', type="nominal",title='Estado, Região',
            sort= sorting, axis=alt.Axis(labels=True)), 
#     y2 = alt.Y2('braz_region', title='Região'),
    size=alt.Size('counts', scale= alt.Scale(range=[30,1200], zero=False),
                  legend= alt.Legend(columns=5, orient='right')), 
    order= alt.Order('counts', sort='descending'),  # smaller points in front
    color= alt.Color('family:N', title='Familia', 
                    legend= alt.Legend(columns=3, symbolLimit=102, direction='vertical'), 
                    scale= alt.Scale(domain= list(cores_familia_naive.keys()), 
                                     range= list(cores_familia_naive.values()))),
    tooltip= alt.Tooltip(['state_and_region','start_year','family','counts'])
)

# OBS: marca o background da região que eu quiser, mas distorce as cores
# rule = alt.Chart(teste2[teste2['regiao'].isin(['SE','NE','CO'])], 
#                  title='Animais coletados por ano nos Estados Brasileiros').mark_rule(
#                     opacity=0.1, size=19, color='#dedede', filled=False, fill='green').encode(
#     y= alt.Y('regiao_e_estado', type="nominal",title='Estado, Região',
#             sort= sorting, axis=alt.Axis(labels=True))
# )

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/time-spacial/evolucao_por_estados_brasileiros.html')

g1

<br>

## Lat Long

In [61]:
# def regiao(est):
#     if est not in regioes.keys():
#         return np.NAN
#     else:
#         return regioes[est]
    
# NewTable['regiao'] = NewTable['estado_ou_provincia'].apply(regiao)

In [62]:
from vega_datasets import data

source = alt.topo_feature(data.world_110m.url, 'countries')

world = alt.Chart(source).mark_geoshape(
    fill='white',
    stroke='gray'
).project('naturalEarth1')

# world

In [69]:
decapoda['start_year'].unique()

array(['1969', nan, '1955', '1943', '1975', '1950', '1911', '1953',
       '1903', '1944', '1959', '1956', '1973', '1970', '1974', '1979',
       '1949', '1978', '1964', '1960', '1904', '1968', '1963', '1967',
       '1951', '1993', '1945', '1957', '1901', '1954', '1972', '1942',
       '1962', '1985', '1977', '1952', '1940', '1984', '1976', '1941',
       '1965', '1935', '1966', '1913', '1933', '1980', '1961', '1971',
       '1946', '1999', '1916', '1958', '1932', '1981', '1947', '1936',
       '2014', '1983', '1982', '1987', '1986', '1905', '1927', '1948',
       '1988', '1908', '1914', '1917', '1918', '1939', '1920', '1902',
       '1995', '1991', '1909', '1989', '1990', '1938', '1931', '1898',
       '1924', '1915', '1992', '1926', '2000', '1994', '2008', '1996',
       '1925', '1997', '1998', '2002', '1871', '2003', '2001', '1937',
       '2004', '2006', '2005', '2007', '2009', 'None', '2011', '2010',
       '2012', '2013', '2016', '2015', '2017', '1906', '31364', '20',
       '20

In [91]:
# dynamic version
slider = alt.binding_range(min=1850, max=2020, step=1)
select_year = alt.selection_single(name=' ', fields=['start_year'],
                                   bind=slider, init={'start_year': 1916})

# data = NewTable[(NewTable['long'] < 10) & (NewTable['long'] >-150)].copy()
data = decapoda.copy()

back = alt.Chart(data).mark_point(filled=True,opacity=0.7, color='lightgray').encode(
    longitude = alt.X('long', type='quantitative', title='Longitude'),
    latitude = alt.Y('lat', type='quantitative', title='Latitude'),
    tooltip = alt.Tooltip(['lat','long','country','braz_region','state',
                           'start_year','start_month', 'infraorder', 'family'])
)

front = alt.Chart(data).mark_point(filled=True).encode(
    longitude = alt.X('long:Q', title='Longitude'),
    latitude = alt.Y('lat:Q', title='Latitude'),
    color= alt.Color('family:N', title='Familia', 
                    legend= alt.Legend(columns=4, symbolLimit=102), 
                    scale= alt.Scale(domain= list(cores_familia_naive.keys()), 
                                     range= list(cores_familia_naive.values()))),
    tooltip = alt.Tooltip(['lat','long','country','braz_region','state',
                           'start_year','start_month', 'infraorder', 'family'])
)
# .add_selection(
#     select_year
# ).transform_filter(
#     select_year
# )

equador = alt.Chart(data[data['lat'] == 0], width=800, height= 400).mark_rule(color='black').encode(
    y = alt.Y('lat:Q', axis=alt.Axis(grid=False))
)


g = alt.layer(world, back, front).properties(
    width= 800,
    height= 500
)  # tirei o equador

# ajustando tamanho de fonte
g = g.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g.save('./graphs/time-spacial/lat_long-worldwide.html')

# g

<br>

<font color='red' size='5'>Lat long (layout 2 estático)</font>

In [96]:
db['type_status'].unique()

array([nan, 'Parátipo', 'Holótipo', 'Síntipo', 'Lectótipo',
       'Paralectótipo', 'Material tipo', 'Alótipo', 'Topótipo', 'Neótipo'],
      dtype=object)

In [110]:
# https://nextjournal.com/sdanisch/cartographic-visualization

db = decapoda.copy()

# parsing into string to make legend selector work
db['type_status'] = db['type_status'].astype(str)

# seletores
select_order = alt.selection_multi(fields=['family'], bind='legend')
select_type = alt.selection_multi(fields=['type_status'], bind='legend')

tipos = db['type_status'].unique()  # lista temporária! ajustar depois!

# AJUSTAR CORES

from vega_datasets import data

front = alt.Chart(db).mark_point(filled=True).encode(
    longitude = alt.X('long', type='quantitative', title='Longitude'),
    latitude = alt.Y('lat', type='quantitative', title='Latitude'),
    color= alt.Color('family:N', title='Familia', 
                    legend= alt.Legend(columns=3, symbolLimit=102, direction='vertical', orient='right'), 
                    scale= alt.Scale(domain= list(cores_familia_naive.keys()), 
                                     range= list(cores_familia_naive.values()))),
    shape = alt.Shape('type_status:N', title='Type', scale= alt.Scale(domain=tipos),
                      legend= alt.Legend(columns=4, direction='vertical')),
    tooltip = alt.Tooltip(['lat','long','country','braz_region','state',
                           'start_year','start_month','infraorder','family', 'type_status'])
).add_selection(select_order, select_type).transform_filter(select_order).transform_filter(select_type)

mapa = alt.layer(
     # use the sphere of the Earth as the base layer
    alt.Chart({'sphere': True}).mark_geoshape(
        fill='#f2f2f2'
    ),
    # add a graticule for geographic reference lines
    alt.Chart({'graticule': True}).mark_geoshape(
        stroke='#ffffff', strokeWidth=1
    ),
    # and then the countries of the world
    alt.Chart(alt.topo_feature(data.world_110m.url, 'countries')).mark_geoshape(
        fill='#white', stroke='black', strokeWidth=0.5
    ),
    front
).properties(
    width=800,
    height=600
)

mapa = mapa.project(
    type='naturalEarth1', scale=140
).configure_view(stroke=None)

mapa = mapa.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# mapa.save('./graphs/time-spacial/lat_long-worldwide-static-grid.html')

# mapa

In [120]:
db = decapoda.copy()  # selecting database

# seletores
select_order = alt.selection_multi(fields=['family'], bind='legend')
select_type = alt.selection_multi(fields=['type_status'], bind='legend')

teste = alt.Chart(db).mark_point(filled=True).encode(
    longitude = alt.X('long:Q', title='Longitude'),
    latitude = alt.Y('lat:Q', title='Latitude'),
    color= alt.Color('family:N', title='Familia', 
                    legend= alt.Legend(columns=3, symbolLimit=102), 
                    scale= alt.Scale(domain= list(cores_familia_naive.keys()), 
                                     range= list(cores_familia_naive.values()))),
    shape = alt.Shape('type_status:N', title='Type', scale= alt.Scale(domain=tipos), 
                     legend= alt.Legend(columns=4)),
    tooltip = alt.Tooltip(['lat','long','country','braz_region','state',
                           'start_year','start_month', 'genus', 'family', 'type_status'])
).project(type='naturalEarth1').add_selection(select_order, 
                            select_type).transform_filter(select_order).transform_filter(select_type)

temp = (world + teste).properties(width=800, height=500).configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# temp.save('./graphs/time-spacial/lat_long-worldwide-static.html')
# temp

<br>

### Latin America

In [121]:
db = decapoda.copy()  # selecting database

# seletores
select_order = alt.selection_multi(fields=['family'], bind='legend')
select_type = alt.selection_multi(fields=['type_status'], bind='legend')

tipos = db['type_status'].unique()  # lista temporária! ajustar depois!

# mapa
from vega_datasets import data

source = alt.topo_feature(data.world_110m.url, 'countries')

latinAmerica = alt.Chart(source).mark_geoshape(
    fill='white',
    stroke='gray'
).project('naturalEarth1', translate=[680, 150], scale=450)

# pontos
teste = alt.Chart(db).mark_point(filled=True).encode(
    longitude = alt.X('long:Q', title='Longitude'),
    latitude = alt.Y('lat:Q', title='Latitude'),
    color= alt.Color('family:N', title='Familia', 
                    legend= alt.Legend(columns=3, symbolLimit=102), 
                    scale= alt.Scale(domain= list(cores_familia_naive.keys()), 
                                     range= list(cores_familia_naive.values()))),
    shape = alt.Shape('type_status:N', title='Type', scale= alt.Scale(domain=tipos), 
                     legend= alt.Legend(columns=4)),
    tooltip = alt.Tooltip(['lat','long','country','braz_region','state',
                           'start_year','start_month', 'genus', 'family', 'type_status'])
).project(type='naturalEarth1', translate=[680, 150], scale=450).add_selection(select_order,
                                select_type).transform_filter(select_order).transform_filter(select_type)


# configurando grafico
temp = (latinAmerica + teste).properties(width=600, height=600).configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# temp.save('./graphs/time-spacial/lat_long-latinAmerica-static.html')
# temp

**Fim!**