# Time-Spacial Exploration

By **Franklin Oliveira**

-----
This notebook contains all code necessary to make charts from `carcinos` database with focus on time and space exploration. Here you'll find some basic data treatment and charts' code. 

Database: <font color='blue'>'Planilha geral Atualizada FINAL 5_GERAL_sendo trabalhada no Google drive.xlsx'</font>

In [1]:
import datetime
import numpy as np
import pandas as pd

from collections import defaultdict

# quick visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Waffle Charts
# from pywaffle import Waffle 
# docs: https://pywaffle.readthedocs.io/en/latest/examples/block_shape_distance_location_and_direction.html

# visualization
import altair as alt

# enabling notebook renderer
# alt.renderers.enable('notebook')
# alt.renderers.enable('default')

# disabling rows limit
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Importing data...

In [2]:
NewTable = pd.read_csv('./data/treated_db.csv', sep=';', encoding='utf-8-sig', low_memory=False)

## Filtering

At least for now, we'll be considering only specimens of order decapoda (deeply revised by the Museum's crew)

In [3]:
decapoda = NewTable[NewTable['order'] == 'Decapoda'].copy()

<br>

<font size=5>**Color palette**</font>

Colors (per infraorder): 

- <font color='#e26d67'><b>Ascacidae</b></font>
- <font color='#007961'><b>Anomura</b></font>
- <font color='#7a2c39'><b>Achelata</b></font>
- <font color='#b67262'><b>Axiidea</b></font>
- <font color='#ee4454'><b>Brachyura</b></font>
- <font color='#3330b7'><b>Caridea</b></font>
- <font color='#58b5e1'><b>Gebiidea</b></font>
- <font color='#b8e450'><b>Stenopodídea</b></font>
- <font color='#a0a3fd'><b>Astacidae</b></font>
- <font color='#deae9e'><b>Polychelida</b></font>
- <font color='#d867be'><b>Grapsidae</b></font>
- <font color='#fece5f'><b>Xanthoidea</b></font>

In [4]:
# importing customized color palettes
from src.MNViz_colors import *

<br>

## Graph: Exploring time-spacial evolution of the database

In [5]:
# corrects some typos
continent_typos = {
    'America do Sul':'América do Sul',
    'America do Norte':'América do Norte',
    'America Central':'América Central',
    'Africa':'África',
    'Asia':'Ásia'
}

decapoda['continent'] = decapoda['continent'].apply(lambda x: continent_typos[x] if x in continent_typos.keys() else x)

In [6]:
# looking good...
decapoda['continent'].value_counts()

América do Sul      8144
Desconhecido         106
Europa                24
América do Norte      21
Oceania                9
América Central        8
África                 6
Ásia                   2
Name: continent, dtype: int64

### Chart 1: Evolution per time and continent

In [7]:
# p.s.: some families have more than one corresponding infraorder (ERROR)
# decapoda[decapoda['family'] == 'Xanthidae']['infraorder'].unique()

In [8]:
# grouping
teste = NewTable.groupby(['start_year','continent', 'family']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

In [9]:
# selector
select_family = alt.selection_multi(fields=['family'], bind='legend')

# database
db = teste

# auxiliar variables for encoding fields
x_labels = db.sort_values('start_year')['start_year'].unique()
y_labels = db['continent'].unique()
counts = db['counts'].unique()
counts = list(range(min(counts), max(counts), 100))

g1 = alt.Chart(teste, title='Temporal evolution per continent', height=300, width=2000).mark_circle().encode(
    x= alt.X('start_year', type="ordinal", title='Sampling Year',
             scale= alt.Scale(domain= x_labels)),
    y= alt.Y('continent', title='Continent', 
             sort=alt.EncodingSortField('counts', op="count", order='descending'),
             scale= alt.Scale(domain= y_labels)), 
    size=alt.Size('counts', title='Counts',
                  scale= alt.Scale(domain= counts, range=[20,120]),
                  legend= alt.Legend(orient='bottom', direction='horizontal')), 
    order= alt.Order('counts', sort='descending'),  # smaller points in front
#     color= alt.Color('ordem', scale=alt.Scale(domain=ordens, range=cores)),  # old palette per order
    color= alt.Color('family:N',title= 'Family', 
                    legend= alt.Legend(columns=10, symbolLimit= 102, orient='bottom'),
                    scale= alt.Scale(domain= list(cores_familia_naive.keys()), 
                                     range= list(cores_familia_naive.values()))),
    tooltip= alt.Tooltip(['continent','start_year','family','counts'])
).add_selection(select_family).transform_filter(select_family)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/time-spacial/temporal_evolution_per_continent-wFamilies.html')
# g1

<br>

### Chart 2: Evolution per time and country

In [28]:
# agrupando por Ano e Continente
teste1 = decapoda.groupby(['start_year','country', 'family']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

In [30]:
# selector
select_family = alt.selection_multi(fields=['family'], bind='legend')

# filtering database (removing Brazil)
db = teste1[teste1['country'] != 'Brasil']

# auxiliar variables for encoding fields
x_labels = db.sort_values('start_year')['start_year'].unique()
y_labels = db.sort_values(['start_year', 'counts'])['country'].unique()
counts = db['counts'].unique()

g1 = alt.Chart(db, title='Collected specimens per year in other countries', height=500,
              width=800).mark_circle().encode(
    x= alt.X('start_year', type="ordinal", title='Sampling Year',
            scale= alt.Scale(domain=x_labels)),
    y= alt.Y('country', title='Country', 
             scale= alt.Scale(domain=y_labels),
             sort=alt.EncodingSortField('counts', op="sum", order='descending')), 
    size=alt.Size('counts', title='Counts',
                  scale= alt.Scale(domain=counts, range=[20,120]),
                  legend= alt.Legend(orient='right', direction='horizontal')), 
    order= alt.Order('counts', sort='descending'),  # smaller points in front
#     color= alt.Color('ordem', scale=alt.Scale(domain=ordens, range=cores)),  # old palette per order
    color= alt.Color('family:N', title='Family', 
                    legend= alt.Legend(columns=3, symbolLimit=102, orient='right'), 
                    scale= alt.Scale(domain= list(cores_familia_naive.keys()), 
                                     range= list(cores_familia_naive.values()))),
    tooltip= alt.Tooltip(['country','start_year','family','counts'])
).add_selection(select_family).transform_filter(select_family)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/time-spacial/temporal_evolution_per_country.html')
g1

<br>

### Chart 3: Evolution per time and brazilian states

In [12]:
# parsing into string
decapoda['braz_region'] = decapoda['braz_region'].astype(str)

# agrupando por Ano e Continente
teste2 = decapoda[decapoda['country'] == 'Brasil']
teste2 = teste2.groupby(['start_year','state', 'braz_region', 'family']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

In [22]:
teste2['state_and_region'] = teste2['state'] + ', ' + teste2['braz_region']

# ordenando por região e soma das contagens
sorting = teste2.groupby(['state_and_region', 'braz_region']).sum()['counts'].reset_index(
                                                                ).rename(columns={'counts':'soma'})

# sorting per most representative region and, then, by most representative state
regioes = sorting.sort_values('soma', ascending=False)['braz_region'].unique()

df = pd.DataFrame()
for reg in regioes:
    temp = sorting[sorting['braz_region'] == reg].sort_values('soma', ascending=False)
    df = pd.concat([df, temp])
    
sorting = df['state_and_region'].unique()

# p.s.: this approach was not producing the desired outcome
# sorting = sorting.sort_values(['braz_region','soma'], ascending=False)['state_and_region'].unique()

In [27]:
# selector
select_family = alt.selection_multi(fields=['family'], bind='legend')

# filtering database (removing Brazil)
db = teste2

# auxiliar variables for encoding fields
x_labels = db.sort_values('start_year')['start_year'].unique()
y_labels = sorting
counts = db['counts'].unique()
counts = list(range(min(counts), max(counts), 20))

g1 = alt.Chart(db, title='Total of collected decapoda per year in Brazilian States',
               width=1600 ).mark_circle(opacity=0.8).encode(
    x= alt.X('start_year', type="ordinal", title='Sampling Year',
            scale= alt.Scale(domain= x_labels)),
    y= alt.Y('state_and_region', type="nominal",title='State, Region',
             scale= alt.Scale(domain= y_labels),
             sort= sorting, axis=alt.Axis(labels=True)), 
#     y2 = alt.Y2('braz_region', title='Região'),
    size=alt.Size('counts', title='Counts',
                  scale= alt.Scale(domain= counts, range=[30,130], zero=False),
                  legend= alt.Legend(orient='bottom', direction='horizontal')), 
    order= alt.Order('counts', sort='descending'),  # smaller points in front
    color= alt.Color('family:N', title='Family', 
                    legend= alt.Legend(columns=11, symbolLimit=102, direction='horizontal', orient='bottom'), 
                    scale= alt.Scale(domain= list(cores_familia_naive.keys()), 
                                     range= list(cores_familia_naive.values()))),
    tooltip= alt.Tooltip(['state_and_region','start_year','family','counts'])
).add_selection(select_family).transform_filter(select_family)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/time-spacial/temporal_evolution_per_brazilian_states.html')
# g1

<br>

## Lat Long

In [15]:
# def regiao(est):
#     if est not in regioes.keys():
#         return np.NAN
#     else:
#         return regioes[est]
    
# NewTable['regiao'] = NewTable['estado_ou_provincia'].apply(regiao)

In [16]:
from vega_datasets import data

source = alt.topo_feature(data.world_110m.url, 'countries')

world = alt.Chart(source).mark_geoshape(
    fill='white',
    stroke='gray'
).project('naturalEarth1')

# world

In [17]:
# dynamic version
# slider = alt.binding_range(min=1850, max=2020, step=1)
# select_year = alt.selection_single(name=' ', fields=['start_year'],
#                                    bind=slider, init={'start_year': 1916})
select_family = alt.selection_multi(fields=['family'], bind='legend')

# data = NewTable[(NewTable['long'] < 10) & (NewTable['long'] >-150)].copy()
data = decapoda.copy()

back = alt.Chart(data).mark_point(filled=True,opacity=0.7, color='lightgray').encode(
    longitude = alt.X('long', type='quantitative', title='Longitude'),
    latitude = alt.Y('lat', type='quantitative', title='Latitude'),
    tooltip = alt.Tooltip(['lat','long','catalog_number', 'country','braz_region','state',
                           'locality', 'environment',
                           'start_year','start_month', 'infraorder', 'family','genus', 'type_status'])
)

front = alt.Chart(data).mark_point(filled=True).encode(
    longitude = alt.X('long:Q', title='Longitude'),
    latitude = alt.Y('lat:Q', title='Latitude'),
    color= alt.Color('family:N', title='Family', 
                    legend= alt.Legend(columns=3, symbolLimit=102), 
                    scale= alt.Scale(domain= list(cores_familia_naive.keys()), 
                                     range= list(cores_familia_naive.values()))),
    tooltip = alt.Tooltip(['lat','long','catalog_number', 'country','braz_region','state',
                           'locality', 'environment',
                           'start_year','start_month', 'infraorder', 'family','genus', 'type_status'])
)

equador = alt.Chart(data[data['lat'] == 0], width=800, height= 400).mark_rule(color='black').encode(
    y = alt.Y('lat:Q', axis=alt.Axis(grid=False))
)


g = alt.layer(world, back, front).properties(
    width= 800,
    height= 500
)  # tirei o equador

# ajustando tamanho de fonte
g = g.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g.save('./graphs/time-spacial/lat_long-worldwide.html')
# g

<br>

<font color='red' size='5'>Lat long (layout 2)</font>

In [18]:
# https://nextjournal.com/sdanisch/cartographic-visualization

db = decapoda.copy()

# parsing into string to make legend selector work
db['type_status'] = db['type_status'].astype(str)

# seletores
select_family = alt.selection_multi(fields=['family'], bind='legend')
select_type = alt.selection_multi(fields=['type_status'], bind='legend')

tipos = db['type_status'].unique()  # lista temporária! ajustar depois!

# AJUSTAR CORES

from vega_datasets import data

front = alt.Chart(db).mark_point(filled=True).encode(
    longitude = alt.X('long', type='quantitative', title='Longitude'),
    latitude = alt.Y('lat', type='quantitative', title='Latitude'),
    color= alt.Color('family:N', title='Family', 
                    legend= alt.Legend(columns=3, symbolLimit=102, direction='vertical', orient='right'), 
                    scale= alt.Scale(domain= list(cores_familia_naive.keys()), 
                                     range= list(cores_familia_naive.values()))),
    shape = alt.Shape('type_status:N', title='Type', scale= alt.Scale(domain=tipos),
                      legend= alt.Legend(columns=4, direction='vertical')),
    tooltip = alt.Tooltip(['lat','long','country','braz_region','state',
                           'start_year','start_month','infraorder','family', 'type_status'])
).add_selection(select_family, select_type).transform_filter(select_family).transform_filter(select_type)

mapa = alt.layer(
     # use the sphere of the Earth as the base layer
    alt.Chart({'sphere': True}).mark_geoshape(
        fill='#f2f2f2'
    ),
    # add a graticule for geographic reference lines
    alt.Chart({'graticule': True}).mark_geoshape(
        stroke='#ffffff', strokeWidth=1
    ),
    # and then the countries of the world
    alt.Chart(alt.topo_feature(data.world_110m.url, 'countries')).mark_geoshape(
        fill='#white', stroke='black', strokeWidth=0.5
    ),
    front
).properties(
    width=800,
    height=600
)

mapa = mapa.project(
    type='naturalEarth1', scale=140
).configure_view(stroke=None)

mapa = mapa.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# mapa.save('./graphs/time-spacial/lat_long-worldwide-static-grid.html')
# mapa

In [19]:
db = decapoda.copy()  # selecting database
db['type_status'] = db['type_status'].astype(str)
# db = db[(db['lat'] > 90) & (db['long'] > 180)]

# seletores
select_family = alt.selection_multi(fields=['family'], bind='legend')
select_type = alt.selection_multi(fields=['type_status'], bind='legend')

teste = alt.Chart(db).mark_point(filled=True).encode(
    longitude = alt.X('long:Q', title='Longitude'),
    latitude = alt.Y('lat:Q', title='Latitude'),
    color= alt.Color('family:N', title='Family', 
                    legend= alt.Legend(columns=3, symbolLimit=102), 
                    scale= alt.Scale(domain= list(cores_familia_naive.keys()), 
                                     range= list(cores_familia_naive.values()))),
    shape = alt.Shape('type_status:N', title='Type', scale= alt.Scale(domain=tipos), 
                     legend= alt.Legend(columns=4)),
    tooltip = alt.Tooltip(['lat','long','catalog_number', 'country','braz_region','state',
                           'locality', 'environment',
                           'start_year','start_month', 'infraorder', 'family','genus', 'type_status'])
).project(type='naturalEarth1').add_selection(select_family, 
                            select_type).transform_filter(select_family).transform_filter(select_type)

temp = (world + teste).properties(width=800, height=500).configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# temp.save('./graphs/time-spacial/lat_long-worldwide-static.html')
# temp

<br>

### Latin America

In [20]:
db = decapoda.copy()  # selecting database
db['type_status'] = db['type_status'].astype(str)

# seletores
select_order = alt.selection_multi(fields=['family'], bind='legend')
select_type = alt.selection_multi(fields=['type_status'], bind='legend')

tipos = db['type_status'].unique()  # lista temporária! ajustar depois!

# mapa
from vega_datasets import data

source = alt.topo_feature(data.world_110m.url, 'countries')

latinAmerica = alt.Chart(source).mark_geoshape(
    fill='white',
    stroke='gray'
).project('naturalEarth1', translate=[680, 150], scale=450)

# pontos
teste = alt.Chart(db).mark_point(filled=True).encode(
    longitude = alt.X('long:Q', title='Longitude'),
    latitude = alt.Y('lat:Q', title='Latitude'),
    color= alt.Color('family:N', title='Family', 
                    legend= alt.Legend(columns=3, symbolLimit=102), 
                    scale= alt.Scale(domain= list(cores_familia_naive.keys()), 
                                     range= list(cores_familia_naive.values()))),
    shape = alt.Shape('type_status:N', title='Tipos', scale= alt.Scale(domain=tipos), 
                     legend= alt.Legend(columns=4)),
    tooltip = alt.Tooltip(['lat','long','catalog_number', 'country','braz_region','state',
                           'locality', 'environment',
                           'start_year','start_month', 'infraorder', 'family','genus', 'type_status'])
).project(type='naturalEarth1', translate=[680, 150], scale=450).add_selection(select_order,
                                select_type).transform_filter(select_order).transform_filter(select_type)


# configurando grafico
temp = (latinAmerica + teste).properties(width=600, height=600).configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# temp.save('./graphs/time-spacial/lat_long-latinAmerica-static.html')
# temp

**Fim!**