# Time-Spacial Exploration

By **Franklin Oliveira**

-----
This notebook contains all code necessary to make charts from `poliqueta` database with focus on time and space exploration. Here you'll find some basic data treatment and charts' code. 

Database: <font color='blue'>'IBUFRJ27.07.2020 - visualização.xlsx'</font>
    

In [1]:
import datetime
import numpy as np
import pandas as pd

from collections import defaultdict

# quick visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Waffle Charts
# from pywaffle import Waffle 
# docs: https://pywaffle.readthedocs.io/en/latest/examples/block_shape_distance_location_and_direction.html

# visualization
import altair as alt

# enabling notebook renderer
# alt.renderers.enable('notebook')
alt.renderers.enable('default')

# disabling rows limit
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Importing data...

In [2]:
NewTable = pd.read_csv('./data/merged_db.csv', sep=';', encoding='utf-8-sig', low_memory=False)

In [3]:
NewTable['genus'] = NewTable['genus'].str.capitalize()

# formatando string NaN
NewTable['family'] = NewTable['family'].apply(lambda x: 'NaN' if x=='Nan' else x)

<br>

<font size=5>**Color Palette per Order**</font>

These images were used as inspiration (https://color.adobe.com/create/image)


<div class='row' style='padding-top:20px;'>
    <div class='col-md-6'>
        <img src="./src/img1.jpg" width='400px'>
    </div>
    <div class='col-md-6'>
        <img src="./src/img2.jpg" width='400px'>
    </div>
</div>

<br>

A partir das imagens acima, selecionamos cores (centróides) para criar a paleta de cores. Foram elas: 
<ul>
    <li style='color:#3CA67F'><b> #3CA67F </b># verde</li>
    <li style='color:#7A9FBF'><b> #7A9FBF </b># azul</li>
    <li style='color:#D94814'><b> #D94814 </b># laranja</li>
    <li style='color:#D96236'><b> #D96236 </b># laranja 2</li>
    <li style='color:#F2B999'><b> #F2B999 </b># 'cor de pele'</li>
    <li style='color:#A66C4B'><b> #A66C4B </b># marrom 1</li>
    <li style='color:#732C02'><b> #732C02 </b># marrom 2</li>
</ul>

A partir das cores "centróides", utilizamos a ferramenta Color Crafter para selecionar diferentes "shades" e auxiliar para categorização em diferentes grupos sugeridos pela equipe de Poliquetas do Museu Nacional. 

<ul>
    <li style='color:#3CA67F'><b> Verde: </b> ['#daffef', '#bbebd3', '#9adabc', '#77c8a5', '#57b791', '#3ca67f', '#2a9670', '#238762', '#257a56']</li>
    <li style='color:#7A9FBF'><b> Azul: </b> ['#e7e5df', '#ccd2d8', '#b2c0d0', '#96afc8', '#7a9fbf', '#5d90b6', '#3c81ae', '#0673a4', '#00669a']</li>
    <li style='color:#D94814'><b> laranja: </b> ['#ffbd84', '#ffaa74', '#ff9760', '#ff814b', '#fc6b36', '#eb5824', '#d94814', '#c83b03', '#b73000']</li>
    <li style='color:#D96236'><b> laranja 2: ['#ffeba9', '#ffd391', '#ffbb7b', '#fda468', '#f18e56', '#e57846', '#d96236', '#cc4d28', '#bf381b']</b> </li>
    <li style='color:#F2B999'><b> cor de pele: ['#ffe9c3', '#fbd0ad', '#f2b999', '#e8a287', '#dd8c76', '#d27666', '#c76158', '#bb4d4b', '#ae393e']</b> </li>
    <li style='color:#A66C4B'><b> marrom 1: ['#d9c6af', '#ccad96', '#c1977c', '#b48061', '#a66c4b', '#975b39', '#874c2c', '#774124', '#683720']</b> </li>
    <li style='color:#732C02'><b> marrom 2: ['#eebd93', '#dfa47a', '#d28d60', '#c37746', '#b4622f', '#a3501d', '#92420e', '#823606', '#732c02']</b> </li>
</ul>



**Colors  (antigas):** 

<ul>
    <li style='color:#41A681'><b> #41A681 </b># verde1</li>
    <li style='color:#3CA67F'><b> #3CA67F </b># verde2</li>
    <li style='color:#7ACAAB'><b> #7ACAAB </b># verde claro</li>
    <li style='color:#78a1a1'><b> #78a1a1 </b># azul</li>
    <li style='color:#8ABFB0'><b> #8ABFB0 </b># azul claro</li>
    <li style='color:#FFB27C'><b> #FFB27C </b># cor de pele clara</li>
    <li style='color:#F29877'><b> #F29877 </b># cor de pele</li>
    <li style='color:#ed845e'><b> #ed845e </b># laranja claro1</li>
    <li style='color:#D96236'><b> #D96236 </b># laranja claro2</li>
    <li style='color:#D95323'><b> #D95323 </b># laranja 1</li>
    <li style='color:#D94B18'><b> #D94B18 </b># laranja 2</li>
    <li style='color:#D9C2AD'><b> #D9C2AD </b># bege</li>
    <li style='color:#A66C4B'><b> #A66C4B </b># marrom claro</li>
    <li style='color:#86471B'><b> #86471B </b># marrom1</li>
    <li style='color:#732C02'><b> #732C02 </b># marrom2</li>
    <li style='color:#592202'><b> #592202 </b># marrom escuro1</li>
    <li style='color:#3D1806'><b> #3D1806 </b># marrom escuro2</li>
    <li style='color:#0D0D0D'><b> #0D0D0D </b># preto</li>
</ul>



In [4]:
# 220 NaNs
# NewTable['order'].value_counts(dropna=False)

In [5]:
# determinando cores de acordo com a planilha (2020.10.01 - IB e MN - Cores visualização.xlsx)
ordens = NewTable['order'].unique()
familias = NewTable['family'].unique()

# # o agrupamento é feito por famílias (ordem daquelas famílias deve assumir certa cor)
cores_ordem = {
    'Spionida':'#41A681',   # verde
    'Sabellida':'#7ACAAB',  # verde claro
    'Canalipalpata':'#78a1a1',  # azul
    'Amphinomida':'#8ABFB0',  # azul claro
    'Eunicida':'#A66C4B', # marrom claro
    'Phyllodocida':'#732C02', # marrom2
    'Terebellida':'#ed845e', # laranja claro1
    'Scolecida':'#D94B18', # laranja 2
    np.NAN:'#0D0D0D',  # preto
    
    # ordens não citadas na planilha:
    'Sipuncula':'#D9C2AD', # bege
    'Crassiclitellata':'#FFB27C', # cor de pele clara
    'Aspidosiphonida':'#F29877',  # cor de pele
    
}

# paleta de cores por família
cores_familia = {
    'Magelonidae':'#238762',    # verde escuro 
    'Oweniidae':'#3CA67F',      # verde (centroide)  
    'Chaetopteridae':'#77c8a5', # verde
    'Amphinomidae':'#bbebd3',   # verde claro
    'Lumbrineridae':'#e7e5df',  # azul claro 1
    'Dorvilleidae':'#b2c0d0',   # azul claro2
    'Oenonidae':'#7A9FBF',      # azul (centroide)
    'Eunicidae':'#3c81ae',      # azul
    'Onuphidae':'#00669a',      # azul escuro
    'Syllidae':'#ffbd84', 
    'Typhloscolecidae':'#ffaa74', 
    'Aphroditidae':'#ff9760', 
    'Acoetidae':'#ff814b', 
    'Chrysopetalidae':'#fc6b36', 
    'Eulepethidae':'#eb5824',
    'Lopadorrhynchidae':'#d94814',  # laranja (centroide)
    'Polynoidae':'#c83b03',
    'Nereididae':'#b73000',
    'Nephtyidae':'#f18e56',
    'Glyceridae':'#D96236',         # laranja 2 (centroide)
    'Tomopteridae':'#bf381b',
    'Serpulidae':'#fbd0ad',
    'Sabellidae':'#f2b999', # cor de pele (centroide)
    'Sabellariidae':'#e8a287',
    'Spionidae':'#d27666',
    'Ampharetidae':'#b48061',
    'Pectinariidae':'#a66c4b',  # marrom 1 (centroide),
    'Trichobranchidae':'#975b39',
    'Terebellidae':'#874c2c',
    'Cirratulidae':'#774124',
    'Flabelligeridae':'#683720',
    'Sternaspidae':'#eebd93',
    'Orbiniidae':'#dfa47a',
    'Opheliidae':'#d28d60',
    'Capitellidae':'#c37746',
    'Arenicolidae':'#b4622f',
    'Cossuridae':'#a3501d',
    'Scalibregmatidae':'#92420e',
    'Paraonidae':'#823606',
    'Maldanidae':'#732c02', # marrom 2 (centroide)
    'NaN':'#0D0D0D',  # preto
}

<br>

## Graph: Exploring time-spacial evolution of the database

In [6]:
NewTable['continent'].value_counts()

South America      3637
Antarctica           74
Central America      52
Europe               11
Asia                  3
North America         2
Oceania               2
Africa                1
Name: continent, dtype: int64

### Chart 1: Evolution per time and continent

In [7]:
# agrupando por Ano e Continente
teste = NewTable.groupby(['start_year','continent', 'family']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

<font color='red' size='4'>**dica:** jogar legenda para a parte de baixo</font>

In [8]:
g1 = alt.Chart(teste, title='Temporal evolution per continent').mark_circle().encode(
    x= alt.X('start_year', type="ordinal", title='Sampling Year'),
    y= alt.Y('continent', title=None, 
             sort=alt.EncodingSortField('counts', op="sum", order='descending')), 
    size=alt.Size('counts', title='Counts', scale= alt.Scale(range=[30,800])), 
    order= alt.Order('counts', sort='descending'),  # smaller points in front
    color= alt.Color('family', title='Family',
                     scale=alt.Scale(domain=list(cores_familia.keys()), range=list(cores_familia.values())),
                    legend= alt.Legend(columns=5, symbolLimit=42)),
    tooltip= alt.Tooltip(['continent','start_year','family','counts'])
)

g1 = g1.properties(
        width=800,
        height=200
).configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12,
    orient='bottom'
)

# g1.save('./graphs/time-spacial/evolucao_por_continente.html')

# g1

<br>

### Chart 2: Evolution per time and country

In [9]:
# agrupando por Ano e Continente
teste1 = NewTable.groupby(['start_year','country', 'family']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

In [10]:
# retirei Brasil (será detalhado por Estados depois)
g1 = alt.Chart(teste1[teste1['country'] != 'Brazil'], title='Animals collected in other countries',
              width=400).mark_circle().encode(
    x= alt.X('start_year', type="ordinal", title='Sampling Year'),
    y= alt.Y('country', title='Country', 
             sort=alt.EncodingSortField('counts', op="sum", order='descending')), 
    size=alt.Size('counts', scale= alt.Scale(range=[20,250])), 
    order= alt.Order('counts', sort='descending'),  # smaller points in front
    color= alt.Color('family', 
                     scale=alt.Scale(domain=list(cores_familia.keys()), range=list(cores_familia.values())),
                    legend= alt.Legend(columns=2, symbolLimit=42)),
    tooltip= alt.Tooltip(['country','start_year','family','counts'])
)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/time-spacial/evolucao_por_pais.html')
# g1

<br>

### Chart 3: Evolution per time and brazilian states

In [11]:
# agrupando por Ano e Continente
teste2 = NewTable[NewTable['country'] == 'Brazil']
teste2 = teste2.groupby(['start_year','state', 'family']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

#### creating column with brazilian regions

In [12]:
regioes = {
    'Rio de Janeiro':'SE',
    'São Paulo':'SE',
    'Espírito Santo': 'SE',
    'Pernambuco':'NE',
    'Santa Catarina':'S',
    'Amazonas':'N',
    'Goiás':'CO',
    'Roraima':'N',
    'Pará':'N',
    'Mato Grosso':'CO',
    'Acre': 'N',
    'Bahia': 'NE',
    'Minas Gerais': 'SE',
    'Mato Grosso do Sul': 'CO',
    'Paraná': 'S',
    'Rondônia': 'N',
    'Ceará': 'NE',
    'Maranhão': 'N',
    'Rio Grande do Sul': 'S',
    'Paraíba': 'NE',
    'Distrito Federal': 'CO',
    'Alagoas': 'NE',
    'Amapá':'N',
    'Piauí': 'NE',
    'Brasília': 'CO',
    'Tocantins': 'N',
    'Rio Grande do Norte': 'NE',
    'Sergipe': 'NE',
    'Minas Gerais/Goiás/Distrito Federal': 'CO',
    'Santa Catarina-Rio Grande do Sul': 'S'
}

# criando coluna com as regiões
teste2['regiao'] = teste2['state'].apply(lambda x: regioes[str(x)])

# coluna com estado, regiao
teste2['regiao_e_estado'] = teste2['state'] + ', ' + teste2['regiao']

# ordenando por região e soma das contagens
sorting = teste2.groupby(['regiao_e_estado', 'regiao']).sum()['counts'].reset_index(
                                                                ).rename(columns={'counts':'soma'})
sorting = sorting.sort_values(['regiao','soma'], ascending=False)['regiao_e_estado'].unique()

In [13]:
g1 = alt.Chart(teste2, title='Animals collected on brazilian states per year').mark_circle(opacity=0.8).encode(
    x= alt.X('start_year', type="ordinal", title='Sampling Year'),
    y= alt.Y('regiao_e_estado', type="nominal",title='State, Region',
            sort= sorting, axis=alt.Axis(labels=True)), 
    y2 = alt.Y2('regiao', title='Região'),
    size=alt.Size('counts', title='Counts', scale= alt.Scale(range=[30,800], zero=False)), 
    order= alt.Order('counts', sort='descending'),  # smaller points in front
    color= alt.Color('family', title='Family',
                     scale=alt.Scale(domain=list(cores_familia.keys()), range=list(cores_familia.values())),
                    legend= alt.Legend(columns=5, symbolLimit=42)),
    tooltip= alt.Tooltip(['regiao','state','start_year','family','counts'])
)

g1 = g1.properties(
    height=350,
    width=800
).configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12,
    orient='bottom'
)

# g1.save('./graphs/time-spacial/evolucao_por_estados_brasileiros.html')
# g1

<br>

## Lat Long

In [14]:
def regiao(est):
    if est not in regioes.keys():
        return np.NAN
    else:
        return regioes[est]
    
NewTable['regiao'] = NewTable['state'].apply(regiao)

In [15]:
from vega_datasets import data

source = alt.topo_feature(data.world_110m.url, 'countries')

world = alt.Chart(source).mark_geoshape(
    fill='white',
    stroke='gray'
).project('naturalEarth1')

# world

In [16]:
# dynamic version
slider = alt.binding_range(min=1905, max=2015, step=1)
select_year = alt.selection_single(name=' ', fields=['start_year'],
                                   bind=slider, init={'start_year': 1905})

# data = NewTable[(NewTable['long'] < 10) & (NewTable['long'] >-150)].copy()
data = NewTable.copy()
data['equador'] = 0

back = alt.Chart(data).mark_circle(color='lightgray').encode(
    longitude = alt.X('long', type='quantitative', title='Longitude'),
    latitude = alt.Y('lat', type='quantitative', title='Latitude'),
    tooltip = alt.Tooltip(['lat','long','country','regiao','state', 'locality','start_year','collector_full_name',
                           'family', 'genus','species','order', 'catalog_number']),
#     shape = alt.Shape('type:N', title='Type')
)

front = alt.Chart(data).mark_circle().encode(
    longitude = alt.X('long', type='quantitative', title='Longitude'),
    latitude = alt.Y('lat', type='quantitative', title='Latitude'),
    color = alt.Color('family:N', title='Family', scale=
                     alt.Scale(range=list(cores_familia.values()), domain=list(cores_familia.keys())),
                     legend=alt.Legend(columns=2, symbolLimit=42)),
    tooltip = alt.Tooltip(['lat','long','country','regiao','state', 'locality','start_year','collector_full_name',
                           'family', 'genus','species','order', 'catalog_number']),
#     shape = alt.Shape('type:N', title='Type')
).add_selection(
    select_year
).transform_filter(
    select_year
)

equador = alt.Chart(data, width=700, height= 400).mark_rule(color='black', size=1).encode(
    y = alt.Y('equador', axis=alt.Axis(grid=False))
)


g = alt.layer(world, back, front).properties(
    width=800,
    height=500
).configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g.save('./graphs/time-spacial/lat_long.html')

# g

<br>

<font color='red' size='5'>ajustes para versão estática - seção temporária</font>

In [17]:
# https://nextjournal.com/sdanisch/cartographic-visualization

# AJUSTAR CORES

from vega_datasets import data

mapa = alt.layer(
     # use the sphere of the Earth as the base layer
    alt.Chart({'sphere': True}).mark_geoshape(
        fill='#f2f2f2'
    ),
    # add a graticule for geographic reference lines
    alt.Chart({'graticule': True}).mark_geoshape(
        stroke='#ffffff', strokeWidth=1
    ),
    # and then the countries of the world
    alt.Chart(alt.topo_feature(data.world_110m.url, 'countries')).mark_geoshape(
        fill='#white', stroke='black', strokeWidth=0.5
    ), 
    back, front
).properties(
    width=800,
    height=500
)

mapa = mapa.project(
    type='naturalEarth1', scale=140
).configure_view(stroke=None)

# ajustes de fonte
mapa = mapa.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# mapa.save('./graphs/time-spacial/lat_long-worldwide-grid.html')

# mapa

In [27]:
data = NewTable.copy()
data['type'] = data['type'].apply(lambda x:'NaN' if str(x) == 'nan' else x)

select_family = alt.selection_multi(fields=['family'], bind='legend')
select_type = alt.selection_multi(fields=['type'], bind='legend')

teste = alt.Chart(data).mark_point(filled=True).encode(
    longitude = alt.X('long:Q', title='Longitude'),
    latitude = alt.Y('lat:Q', title='Latitude'),
    color = alt.Color('family:N', title='Family', 
                      scale = alt.Scale(range=list(cores_familia.values()), domain=list(cores_familia.keys())),
                      legend=alt.Legend(columns=2, symbolLimit=42)),
    tooltip = alt.Tooltip(['lat','long','country','regiao','state', 'locality','start_year','collector_full_name',
                           'family', 'genus','species','order', 'type', 'catalog_number']), 
    shape= alt.Shape('type:N', title='Type', 
                    scale= alt.Scale(domain=['Holotype', 'Neotype','Paratype', 'NaN'],
                                     range=['triangle', 'square', 'cross', 'circle'])),
).project(type='naturalEarth1').add_selection(
    select_family, select_type).transform_filter(select_family).transform_filter(select_type)

temp = (world + teste).properties(width=800, height=500)


# ajustes de fonte
temp = temp.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# temp.save('./graphs/time-spacial/lat_long-worldwide-static.html')

# temp

### Latin America

In [236]:
from vega_datasets import data

source = alt.topo_feature(data.world_110m.url, 'countries')

latinAmerica = alt.Chart(source).mark_geoshape(
    fill='white',
    stroke='gray'
).project('naturalEarth1', translate=[680, 150], scale=450)

# latinAmerica

select_family = alt.selection_multi(fields=['family'], bind='legend')
select_type = alt.selection_multi(fields=['type'], bind='legend')

data = NewTable.copy()
data['type'] = data['type'].apply(lambda x:'NaN' if str(x) == 'nan' else x)

teste = alt.Chart(data).mark_point(filled=True, size=50).encode(
    longitude = alt.X('long:Q', title='Longitude'),
    latitude = alt.Y('lat:Q', title='Latitude'),
    color = alt.Color('family:N', title='Family', 
                      scale = alt.Scale(range=list(cores_familia.values()), domain=list(cores_familia.keys())),
                      legend=alt.Legend(columns=2, symbolLimit=42)),
    tooltip = alt.Tooltip(['lat','long','country','regiao','state', 'locality','start_year','collector_full_name',
                           'family', 'genus','species','order', 'type', 'catalog_number']), 
    shape= alt.Shape('type:N', title='Type', 
                    scale= alt.Scale(domain=['Holotype', 'Neotype','Paratype', 'NaN'],
                                     range=['triangle', 'square', 'cross', 'circle'])),
#     opacity = alt.condition(select_family, alt.value(1), alt.value(0))
).project(type='naturalEarth1', translate=[680, 150]).add_selection(select_family,
                                    select_type).transform_filter(select_family).transform_filter(select_type)

temp = (latinAmerica + teste).properties(width=600, height=600)

# ajustes de fonte
temp = temp.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# temp.save('./graphs/time-spacial/lat_long-worldwide-static-latinAmerica.html')

# temp

## Lat long version for types

In [237]:
from vega_datasets import data

source = alt.topo_feature(data.world_110m.url, 'countries')

latinAmerica = alt.Chart(source).mark_geoshape(
    fill='white',
    stroke='gray'
).project('naturalEarth1', translate=[680, 150], scale=450)

# latinAmerica

In [238]:
data = NewTable[~NewTable['type'].isna()].copy()
data['type'] = data['type'].apply(lambda x:'NaN' if str(x) == 'nan' else x)
# 
select_family = alt.selection_multi(fields=['family'], bind='legend')
select_type = alt.selection_multi(fields=['type'], bind='legend')

teste = alt.Chart(data).mark_point(filled=True, size=80).encode(
    longitude = alt.X('long:Q', title='Longitude'),
    latitude = alt.Y('lat:Q', title='Latitude'),
    color = alt.Color('family:N', title='Family', 
                      scale = alt.Scale(range=list(cores_familia.values()), domain=list(cores_familia.keys())),
                      legend=alt.Legend(columns=2, symbolLimit=42)),
    tooltip = alt.Tooltip(['lat','long','country','regiao','state', 'locality','start_year','collector_full_name',
                           'family', 'genus','species','order', 'catalog_number']), 
    shape= alt.Shape('type:N', title='Type', 
                    scale= alt.Scale(domain=['Holotype', 'Neotype','Paratype'],
                                     range=['triangle', 'square', 'circle'])),
    
).project(type='naturalEarth1', translate=[680, 150]).add_selection(select_family, select_type
                                            ).transform_filter(select_family).transform_filter(select_type)

temp = (latinAmerica + teste).properties(width=600, height=600)

# ajustes de fonte
temp = temp.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# temp.save('./graphs/time-spacial/lat_long-worldwide-static-latinAmerica-type.html')

# temp

-----

**That's it!**