# Time-Spacial Exploration

By **Franklin Oliveira**

-----
This notebook contains all code necessary to make charts from `poliqueta` database with focus on time and space exploration. Here you'll find some basic data treatment and charts' code. 

Database: <font color='blue'>'IBUFRJ27.07.2020 - visualização.xlsx'</font>
    

In [1]:
import datetime
import numpy as np
import pandas as pd

from collections import defaultdict

# quick visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Waffle Charts
# from pywaffle import Waffle 
# docs: https://pywaffle.readthedocs.io/en/latest/examples/block_shape_distance_location_and_direction.html

# visualization
import altair as alt

# enabling notebook renderer
# alt.renderers.enable('notebook')
alt.renderers.enable('default')

# disabling rows limit
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Importing data...

In [2]:
NewTable = pd.read_csv('./data/merged_db.csv', sep=';', encoding='utf-8-sig', low_memory=False)

In [3]:
NewTable['genus'] = NewTable['genus'].str.capitalize()
NewTable['family'] = NewTable['family'].astype(str)
NewTable['type'] = NewTable['type'].astype(str)


# formatando string NaN
NewTable['family'] = NewTable['family'].apply(lambda x: 'NaN' if x=='Nan' else x)

<br>

<font size=5>**Color Palette**</font>


In [4]:
# importing customized color palettes
from src.MNViz_colors import *

<br>

## Graph: Exploring time-spacial evolution of the database

### Chart 1: Evolution per time and continent

In [5]:
# agrupando por Ano e Continente
teste = NewTable.groupby(['start_year','continent', 'family']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

In [6]:
# selector
select_family = alt.selection_multi(fields=['family'], bind='legend')

# database
db = teste

# auxiliar variables for encoding fields
x_labels = db.sort_values('start_year')['start_year'].unique()
temp = db.groupby('continent').sum().reset_index().sort_values('counts')
y_labels = temp['continent'].unique()[::-1]
counts = db['counts'].unique()
counts = list(range(min(counts), max(counts), 20))

g1 = alt.Chart(teste, title='Temporal evolution per continent', height=200, width=800).mark_circle().encode(
    x= alt.X('start_year', type="ordinal", title='Sampling Year',
             scale= alt.Scale(domain= x_labels)),
    y= alt.Y('continent', title='Continent', 
             sort=alt.EncodingSortField('counts', op="sum", order='descending'),
             scale= alt.Scale(domain= y_labels)), 
    size=alt.Size('counts', title='Counts',
                  scale= alt.Scale(domain= counts, range=[20,120]),
                  legend= alt.Legend(orient='bottom', direction='horizontal')), 
    order= alt.Order('counts', sort='descending'),  # smaller points in front
#     color= alt.Color('ordem', scale=alt.Scale(domain=ordens, range=cores)),  # old palette per order
    color= alt.Color('family:N',title= 'Family', 
                    legend= alt.Legend(columns=5, symbolLimit= 102, orient='bottom', direction='horizontal'),
                    scale= alt.Scale(domain= list(cores_familia.keys()), 
                                     range= list(cores_familia.values()))),
    tooltip= alt.Tooltip(['continent','start_year','family','counts'])
).add_selection(select_family).transform_filter(select_family)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/time-spacial/temporal_evolution_per_continent-wFamilies.html')
# g1

<br>

### Chart 2: Evolution per time and country

In [7]:
# agrupando por Ano e Continente
teste1 = NewTable.groupby(['start_year','country', 'family']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

In [8]:
# selector
select_family = alt.selection_multi(fields=['family'], bind='legend')

# filtering database (removing Brazil)
db = teste1[teste1['country'] != 'Brasil']

# auxiliar variables for encoding fields
x_labels = db.sort_values('start_year')['start_year'].unique()
y_labels = db['country'].unique()
temp = db['counts'].unique()
counts = [temp.min(), temp.max()]

g1 = alt.Chart(db, title='Collected specimens per year in other countries', height=500,
              width=800).mark_circle().encode(
    x= alt.X('start_year', type="ordinal", title='Sampling Year',
            scale= alt.Scale(domain=x_labels)),
    y= alt.Y('country', title='Country', 
             scale= alt.Scale(domain=y_labels),
             sort=alt.EncodingSortField('counts', op="sum", order='descending')), 
    size=alt.Size('counts', title='Counts',
                  scale= alt.Scale(domain=counts, range=[20,250]),
                  legend= alt.Legend(orient='right', direction='horizontal')), 
    order= alt.Order('counts', sort='descending'),  # smaller points in front
#     color= alt.Color('ordem', scale=alt.Scale(domain=ordens, range=cores)),  # old palette per order
    color= alt.Color('family:N', title='Family', 
                    legend= alt.Legend(columns=3, symbolLimit=102, orient='right'), 
                    scale= alt.Scale(domain= list(cores_familia.keys()), 
                                     range= list(cores_familia.values()))),
    tooltip= alt.Tooltip(['country','start_year','family','counts'])
).add_selection(select_family).transform_filter(select_family)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/time-spacial/temporal_evolution_per_country.html')
# g1

<br>

### Chart 3: Evolution per time and brazilian states

In [9]:
# agrupando por Ano e Continente
teste2 = NewTable[NewTable['country'] == 'Brazil']
teste2 = teste2.groupby(['start_year','state', 'family']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

#### creating column with brazilian regions

In [10]:
regioes = {
    'Rio de Janeiro':'SE',
    'São Paulo':'SE',
    'Espírito Santo': 'SE',
    'Pernambuco':'NE',
    'Santa Catarina':'S',
    'Amazonas':'N',
    'Goiás':'CO',
    'Roraima':'N',
    'Pará':'N',
    'Mato Grosso':'CO',
    'Acre': 'N',
    'Bahia': 'NE',
    'Minas Gerais': 'SE',
    'Mato Grosso do Sul': 'CO',
    'Paraná': 'S',
    'Rondônia': 'N',
    'Ceará': 'NE',
    'Maranhão': 'N',
    'Rio Grande do Sul': 'S',
    'Paraíba': 'NE',
    'Distrito Federal': 'CO',
    'Alagoas': 'NE',
    'Amapá':'N',
    'Piauí': 'NE',
    'Brasília': 'CO',
    'Tocantins': 'N',
    'Rio Grande do Norte': 'NE',
    'Sergipe': 'NE',
    'Minas Gerais/Goiás/Distrito Federal': 'CO',
    'Santa Catarina-Rio Grande do Sul': 'S'
}

# criando coluna com as regiões
teste2['regiao'] = teste2['state'].apply(lambda x: regioes[str(x)])

# coluna com estado, regiao
teste2['regiao_e_estado'] = teste2['state'] + ', ' + teste2['regiao']

# ordenando por região e soma das contagens
sorting = teste2.groupby(['regiao_e_estado', 'regiao']).sum()['counts'].reset_index(
                                                                ).rename(columns={'counts':'soma'})
sorting = sorting.sort_values(['regiao','soma'], ascending=False)['regiao_e_estado'].unique()

In [11]:
# selector
select_family = alt.selection_multi(fields=['family'], bind='legend')

# filtering database (removing Brazil)
db = teste2

# auxiliar variables for encoding fields
x_labels = db.sort_values('start_year')['start_year'].unique()
y_labels = sorting
counts = db['counts'].unique()
counts = list(range(min(counts), max(counts), 20))

g1 = alt.Chart(db, title='Total of collected decapoda per year in Brazilian States',
               width=800 ).mark_circle(opacity=0.8).encode(
    x= alt.X('start_year', type="ordinal", title='Sampling Year',
            scale= alt.Scale(domain= x_labels)),
    y= alt.Y('regiao_e_estado', type="nominal",title='State, Region',
             scale= alt.Scale(domain= y_labels),
             sort= sorting, axis=alt.Axis(labels=True)), 
#     y2 = alt.Y2('braz_region', title='Região'),
    size=alt.Size('counts', title='Counts',
                  scale= alt.Scale(domain= counts, range=[30,130], zero=False),
                  legend= alt.Legend(orient='bottom', direction='horizontal')), 
    order= alt.Order('counts', sort='descending'),  # smaller points in front
    color= alt.Color('family:N', title='Family', 
                    legend= alt.Legend(columns=5, symbolLimit=102, direction='horizontal', orient='bottom'), 
                    scale= alt.Scale(domain= list(cores_familia.keys()), 
                                     range= list(cores_familia.values()))),
    tooltip= alt.Tooltip(['regiao_e_estado','start_year','family','counts'])
).add_selection(select_family).transform_filter(select_family)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/time-spacial/temporal_evolution_per_brazilian_states.html')
# g1

<br>

## Lat Long

In [12]:
def regiao(est):
    if est not in regioes.keys():
        return np.NAN
    else:
        return regioes[est]
    
NewTable['regiao'] = NewTable['state'].apply(regiao)

In [13]:
from vega_datasets import data

source = alt.topo_feature(data.world_110m.url, 'countries')

world = alt.Chart(source).mark_geoshape(
    fill='white',
    stroke='gray'
).project('naturalEarth1')

# world

In [14]:
# dynamic version
slider = alt.binding_range(min=1905, max=2015, step=1)
select_year = alt.selection_single(name=' ', fields=['start_year'],
                                   bind=slider, init={'start_year': 1905})

# data = NewTable[(NewTable['long'] < 10) & (NewTable['long'] >-150)].copy()
data = NewTable.copy()
data['equador'] = 0

back = alt.Chart(data).mark_circle(color='lightgray').encode(
    longitude = alt.X('long', type='quantitative', title='Longitude'),
    latitude = alt.Y('lat', type='quantitative', title='Latitude'),
    tooltip = alt.Tooltip(['lat','long','country','regiao','state', 'locality','start_year','collector_full_name',
                           'family', 'genus','species','order', 'catalog_number']),
#     shape = alt.Shape('type:N', title='Type')
)

front = alt.Chart(data).mark_circle().encode(
    longitude = alt.X('long', type='quantitative', title='Longitude'),
    latitude = alt.Y('lat', type='quantitative', title='Latitude'),
    color = alt.Color('family:N', title='Family', scale=
                     alt.Scale(range=list(cores_familia.values()), domain=list(cores_familia.keys())),
                     legend=alt.Legend(columns=2, symbolLimit=42)),
    tooltip = alt.Tooltip(['lat','long','country','regiao','state', 'locality','start_year','collector_full_name',
                           'family', 'genus','species','order', 'catalog_number']),
#     shape = alt.Shape('type:N', title='Type')
).add_selection(
    select_year
).transform_filter(
    select_year
)

equador = alt.Chart(data, width=700, height= 400).mark_rule(color='black', size=1).encode(
    y = alt.Y('equador', axis=alt.Axis(grid=False))
)


g = alt.layer(world, back, front).properties(
    width=800,
    height=500
).configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g.save('./graphs/time-spacial/lat_long-animated.html')
# g

<br>

<font color='red' size='5'>Lat long (layout 2)</font>

In [15]:
# https://nextjournal.com/sdanisch/cartographic-visualization

db = NewTable.copy()

# parsing into string to make legend selector work
db['type'] = db['type'].astype(str)

# seletores
select_family = alt.selection_multi(fields=['family'], bind='legend')
select_type = alt.selection_multi(fields=['type'], bind='legend')

tipos = db['type'].unique()  # lista temporária! ajustar depois!

# AJUSTAR CORES

from vega_datasets import data

front = alt.Chart(db).mark_point(filled=True, size=80).encode(
    longitude = alt.X('long', type='quantitative', title='Longitude'),
    latitude = alt.Y('lat', type='quantitative', title='Latitude'),
    color= alt.Color('family:N', title='Family', 
                    legend= alt.Legend(columns=2, symbolLimit=102, direction='vertical', orient='right'), 
                    scale= alt.Scale(domain= list(cores_familia.keys()), 
                                     range= list(cores_familia.values()))),
     shape= alt.Shape('type:N', title='Types', 
                     legend= alt.Legend(columns=4),
                     scale= alt.Scale(domain=['Holotype', 'Neotype','Paratype', 'nan'],
                                      range=['triangle', 'square', 'cross','circle'])),
   tooltip = alt.Tooltip(['lat','long','country','regiao','state',
                           'start_year','start_month','order','family', 'type'])
).add_selection(select_family, select_type).transform_filter(select_family).transform_filter(select_type)

mapa = alt.layer(
     # use the sphere of the Earth as the base layer
    alt.Chart({'sphere': True}).mark_geoshape(
        fill='#f2f2f2'
    ),
    # add a graticule for geographic reference lines
    alt.Chart({'graticule': True}).mark_geoshape(
        stroke='#ffffff', strokeWidth=1
    ),
    # and then the countries of the world
    alt.Chart(alt.topo_feature(data.world_110m.url, 'countries')).mark_geoshape(
        fill='#white', stroke='black', strokeWidth=0.5
    ),
    front
).properties(
    width=800,
    height=600
)

mapa = mapa.project(
    type='naturalEarth1', scale=140
).configure_view(stroke=None)

mapa = mapa.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# mapa.save('./graphs/time-spacial/lat_long-worldwide-static-grid.html')
# mapa

In [16]:
db = NewTable.copy()  # selecting database
db['type'] = db['type'].astype(str)
# db = db[(db['lat'] > 90) & (db['long'] > 180)]

# seletores
select_family = alt.selection_multi(fields=['family'], bind='legend')
select_type = alt.selection_multi(fields=['type'], bind='legend')

teste = alt.Chart(db).mark_point(filled=True, size=80).encode(
    longitude = alt.X('long:Q', title='Longitude'),
    latitude = alt.Y('lat:Q', title='Latitude'),
    color= alt.Color('family:N', title='Family', 
                    legend= alt.Legend(columns=2, symbolLimit=102), 
                    scale= alt.Scale(domain= list(cores_familia.keys()), 
                                     range= list(cores_familia.values()))),
     shape= alt.Shape('type:N', title='Types', 
                     legend= alt.Legend(columns=4),
                     scale= alt.Scale(domain=['Holotype', 'Neotype','Paratype', 'nan'],
                                      range=['triangle', 'square', 'cross','circle'])),
    tooltip = alt.Tooltip(['lat','long','catalog_number', 'country','regiao','state',
                           'locality', 'start_year','start_month', 'order', 'family','genus', 'type'])
).project(type='naturalEarth1').add_selection(select_family, 
                            select_type).transform_filter(select_family).transform_filter(select_type)

temp = (world + teste).properties(width=800, height=500).configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# temp.save('./graphs/time-spacial/lat_long-worldwide-static.html')
# temp

### Latin America

In [17]:
db = NewTable.copy()  # selecting database
db['type'] = db['type'].astype(str)

# seletores
select_order = alt.selection_multi(fields=['family'], bind='legend')
select_type = alt.selection_multi(fields=['type'], bind='legend')

tipos = db['type'].unique()  # lista temporária! ajustar depois!

# mapa
from vega_datasets import data

source = alt.topo_feature(data.world_110m.url, 'countries')

latinAmerica = alt.Chart(source).mark_geoshape(
    fill='white',
    stroke='gray'
).project('naturalEarth1', translate=[680, 150], scale=450)

# pontos
teste = alt.Chart(db).mark_point(filled=True, size=80).encode(
    longitude = alt.X('long:Q', title='Longitude'),
    latitude = alt.Y('lat:Q', title='Latitude'),
    color= alt.Color('family:N', title='Family', 
                    legend= alt.Legend(columns=2, symbolLimit=102), 
                    scale= alt.Scale(domain= list(cores_familia.keys()), 
                                     range= list(cores_familia.values()))),
    shape= alt.Shape('type:N', title='Types', 
                     legend= alt.Legend(columns=4),
                     scale= alt.Scale(domain=['Holotype', 'Neotype','Paratype', 'nan'],
                                      range=['triangle', 'square', 'cross','circle'])),
    tooltip = alt.Tooltip(['lat','long','catalog_number', 'country','regiao','state',
                           'locality', 'start_year','start_month', 'order', 'family','genus', 'type'])
).project(type='naturalEarth1', translate=[680, 150], scale=450).add_selection(select_order,
                                select_type).transform_filter(select_order).transform_filter(select_type)


# configurando grafico
temp = (latinAmerica + teste).properties(width=600, height=600).configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# temp.save('./graphs/time-spacial/lat_long-latinAmerica-static.html')
# temp

## Lat long version for types

In [18]:
from vega_datasets import data

source = alt.topo_feature(data.world_110m.url, 'countries')

latinAmerica = alt.Chart(source).mark_geoshape(
    fill='white',
    stroke='gray'
).project('naturalEarth1', translate=[680, 150], scale=450)

# latinAmerica

In [19]:
data = NewTable[~NewTable['type'].isna()].copy()
data['type'] = data['type'].apply(lambda x:'NaN' if str(x) == 'nan' else x)
# 
select_family = alt.selection_multi(fields=['family'], bind='legend')
select_type = alt.selection_multi(fields=['type'], bind='legend')

teste = alt.Chart(data).mark_point(filled=True, size=80).encode(
    longitude = alt.X('long:Q', title='Longitude'),
    latitude = alt.Y('lat:Q', title='Latitude'),
    color = alt.Color('family:N', title='Family', 
                      scale = alt.Scale(range=list(cores_familia.values()), domain=list(cores_familia.keys())),
                      legend=alt.Legend(columns=2, symbolLimit=42)),
    tooltip = alt.Tooltip(['lat','long','country','regiao','state', 'locality','start_year','collector_full_name',
                           'family', 'genus','species','order', 'catalog_number']), 
    shape= alt.Shape('type:N', title='Type', 
                     legend= alt.Legend(orient='right', direction='horizontal'),
                     scale= alt.Scale(domain=['Holotype', 'Neotype','Paratype'],
                                      range=['triangle', 'square', 'circle'])),
).project(type='naturalEarth1', translate=[680, 150]).add_selection(select_family, select_type
                                            ).transform_filter(select_family).transform_filter(select_type)

temp = (latinAmerica + teste).properties(width=600, height=600)

# ajustes de fonte
temp = temp.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# temp.save('./graphs/time-spacial/lat_long-worldwide-static-latinAmerica-type.html')
# temp

-----

**That's it!**