# Time-Spacial Exploration

By **Franklin Oliveira**

-----
This notebook contains all code necessary to make charts from `poliqueta` database with focus on time and space exploration. Here you'll find some basic data treatment and charts' code. 

Database: <font color='blue'>'IBUFRJ27.07.2020 - visualização.xlsx'</font>
    

In [3]:
import datetime
import numpy as np
import pandas as pd

from collections import defaultdict

# quick visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Waffle Charts
# from pywaffle import Waffle 
# docs: https://pywaffle.readthedocs.io/en/latest/examples/block_shape_distance_location_and_direction.html

# visualization
import altair as alt

# enabling notebook renderer
# alt.renderers.enable('notebook')
alt.renderers.enable('default')

# disabling rows limit
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Importing data...

In [4]:
NewTable = pd.read_csv('./data/treated_db.csv', sep=';', encoding='utf-8-sig', low_memory=False)

<br>

<font size=5>**Color Palette per Order**</font>

These images were used as inspiration (https://color.adobe.com/create/image)

<img src="./src/img1.jpg" width='500px'>

<img src="./src/img2.jpg" width='500px'>

Cores: 

    '#8ABFB0',  # azul claro
    '#41A681',  # verde
    '#7ACAAB',  # verde claro
    '#D9C2AD',  # bege
    '#0D0D0D',  # preto
    '#D96236',  # laranja
    '#D94B18',  # laranja escuro
    '#FFB27C',  # cor de pele clara
    '#732C02',  # marrom
    '#86471B',  # mostarda

In [5]:
# 220 NaNs
NewTable['order'].value_counts(dropna=False)

Phyllodocida        1691
Eunicida             937
Sabellida            561
Terebellida          269
Spionida             226
NaN                  220
Scolecida            183
Amphinomida          134
Sipuncula              9
Crassiclitellata       1
Name: order, dtype: int64

In [6]:
ordens = NewTable['order'].unique()
cores = [
    '#8ABFB0',  # azul claro
    '#41A681',  # verde
    '#7ACAAB',  # verde claro
    '#D9C2AD',  # bege
    '#0D0D0D',  # preto
    '#D96236',  # laranja
    '#D94B18',  # laranja escuro
    '#FFB27C',  # cor de pele clara
    '#732C02',  # marrom
    '#86471B',  # mostarda
]

cores_ordem = defaultdict()
for j in range(len(ordens)):
    ordem = ordens[j]
    cores_ordem[ordem] = cores[j]
    
cores_ordem = dict(cores_ordem)

<br>

## Graph: Exploring time-spacial evolution of the database

In [7]:
NewTable['continent'].value_counts()

South America      3637
Antarctica           74
Central America      52
Europe               11
Asia                  3
Oceania               2
North America         2
Africa                1
Name: continent, dtype: int64

### Chart 1: Evolution per time and continent

In [8]:
# agrupando por Ano e Continente
teste = NewTable.groupby(['start_year','continent', 'order']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

In [9]:
g1 = alt.Chart(teste, title='Temporal evolution per continent').mark_circle().encode(
    x= alt.X('start_year', type="ordinal", title='Start Year'),
    y= alt.Y('continent', title='Continent', 
             sort=alt.EncodingSortField('counts', op="sum", order='descending')), 
    size=alt.Size('counts', scale= alt.Scale(range=[30,1200])), 
    color= alt.Color('order', scale=alt.Scale(domain=ordens, range=cores)),
    tooltip= alt.Tooltip(['continent','start_year','order','counts'])
)


g1.save('./graphs/time-spacial/evolucao_por_continente.html')

g1

<br>

### Chart 2: Evolution per time and country

In [10]:
# agrupando por Ano e Continente
teste1 = NewTable.groupby(['start_year','country', 'order']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

In [11]:
# retirei Brasil (será detalhado por Estados depois)
g1 = alt.Chart(teste1[teste1['country'] != 'Brazil'], title='Animals collected in other countries',
              width=400).mark_circle().encode(
    x= alt.X('start_year', type="ordinal", title='Start Year'),
    y= alt.Y('country', title='Country', 
             sort=alt.EncodingSortField('counts', op="sum", order='descending')), 
    size=alt.Size('counts', scale= alt.Scale(range=[20,250])), 
    color= alt.Color('order', scale=alt.Scale(domain=ordens, range=cores)),
    tooltip= alt.Tooltip(['country','start_year','order','counts'])
)


g1.save('./graphs/time-spacial/evolucao_por_pais.html')

g1

<br>

### Chart 3: Evolution per time and brazilian states

In [12]:
# agrupando por Ano e Continente
teste2 = NewTable[NewTable['country'] == 'Brazil']
teste2 = teste2.groupby(['start_year','state', 'order']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

#### creating column with brazilian regions

In [13]:
regioes = {
    'Rio de Janeiro':'SE',
    'São Paulo':'SE',
    'Espírito Santo': 'SE',
    'Pernambuco':'NE',
    'Santa Catarina':'S',
    'Amazonas':'N',
    'Goiás':'CO',
    'Roraima':'N',
    'Pará':'N',
    'Mato Grosso':'CO',
    'Acre': 'N',
    'Bahia': 'NE',
    'Minas Gerais': 'SE',
    'Mato Grosso do Sul': 'CO',
    'Paraná': 'S',
    'Rondônia': 'N',
    'Ceará': 'NE',
    'Maranhão': 'N',
    'Rio Grande do Sul': 'S',
    'Paraíba': 'NE',
    'Distrito Federal': 'CO',
    'Alagoas': 'NE',
    'Amapá':'N',
    'Piauí': 'NE',
    'Brasília': 'CO',
    'Tocantins': 'N',
    'Rio Grande do Norte': 'NE',
    'Sergipe': 'NE',
    'Minas Gerais/Goiás/Distrito Federal': 'CO',
    'Santa Catarina-Rio Grande do Sul': 'S'
}

# criando coluna com as regiões
teste2['regiao'] = teste2['state'].apply(lambda x: regioes[str(x)])

# coluna com estado, regiao
teste2['regiao_e_estado'] = teste2['state'] + ', ' + teste2['regiao']

# ordenando por região e soma das contagens
sorting = teste2.groupby(['regiao_e_estado', 'regiao']).sum()['counts'].reset_index(
                                                                ).rename(columns={'counts':'soma'})
sorting = sorting.sort_values(['regiao','soma'], ascending=False)['regiao_e_estado'].unique()

In [14]:
g1 = alt.Chart(teste2, title='Animals collected on brazilian states per year').mark_circle(opacity=0.8).encode(
    x= alt.X('start_year', type="ordinal", title='Start Year'),
    y= alt.Y('regiao_e_estado', type="nominal",title='State, Region',
            sort= sorting, axis=alt.Axis(labels=True)), 
    y2 = alt.Y2('regiao', title='Região'),
    size=alt.Size('counts', scale= alt.Scale(range=[30,1200], zero=False)), 
    color= alt.Color('order', scale=alt.Scale(domain=ordens, range=cores)),
    tooltip= alt.Tooltip(['regiao','state','start_year','order','counts'])
)

# g1.save('./graphs/time-spacial/evolucao_por_estados_brasileiros.html')

g1

<br>

## Lat Long

In [15]:
def regiao(est):
    if est not in regioes.keys():
        return np.NAN
    else:
        return regioes[est]
    
NewTable['regiao'] = NewTable['state'].apply(regiao)

In [17]:
from vega_datasets import data

source = alt.topo_feature(data.world_110m.url, 'countries')

world = alt.Chart(source).mark_geoshape(
    fill='white',
    stroke='lightgray'
).properties(
    width=600,
    height=300
).project('equirectangular')

world

In [22]:
# dynamic version
slider = alt.binding_range(min=1905, max=2015, step=1)
select_year = alt.selection_single(name=' ', fields=['start_year'],
                                   bind=slider, init={'start_year': 1905})

# data = NewTable[(NewTable['long'] < 10) & (NewTable['long'] >-150)].copy()
data = NewTable.copy()
data['equador'] = 0

back = alt.Chart(data).mark_circle(opacity=0.5, color='lightgray').encode(
    x = alt.X('long', type='quantitative', title='Longitude', axis=alt.Axis(grid=False),
             scale= alt.Scale(domain=[-180,180])),
    y = alt.Y('lat', type='quantitative', title='Latitude', axis=alt.Axis(grid=False),
             scale= alt.Scale(domain=[-90,90])),
    tooltip = alt.Tooltip(['lat','long','country','regiao','state','start_year', 'order'])
).interactive(bind_x=False)

front = alt.Chart(data).mark_circle().encode(
    x = alt.X('long', type='quantitative', title='Longitude', axis=alt.Axis(grid=False),
             scale= alt.Scale(domain=[-180,180])),
    y = alt.Y('lat', type='quantitative', title='Latitude', axis=alt.Axis(grid=False),
             scale= alt.Scale(domain=[-90,90])),
    color = alt.Color('order', title='Order', scale=
                     alt.Scale(range=cores, domain=ordens)),
    tooltip = alt.Tooltip(['lat','long','country','regiao','state','start_year', 'order'])
).add_selection(
    select_year
).transform_filter(
    select_year
)

equador = alt.Chart(data, width=700, height= 400).mark_rule(color='black', size=1).encode(
    y = alt.Y('equador', axis=alt.Axis(grid=False))
)


g = alt.layer(world, back, front)

g.save('./graphs/time-spacial/lat_long.html')

g

-----

**That's it!**