# Cumulative counts per geographical region

By **Franklin Oliveira**

-----
This notebook contains all code necessary to make charts from `repteis` database with focus on collection's cumulative spacial increments. Here you'll find some basic data treatment and charts' code. 

Database: <font color='blue'>'Compilacao Livros Repteis - 2 a 10 - 2020_04_28.xls'</font>.m
    

In [1]:
import datetime
import numpy as np
import pandas as pd

from collections import defaultdict

# quick visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Waffle Charts
# from pywaffle import Waffle 
# docs: https://pywaffle.readthedocs.io/en/latest/examples/block_shape_distance_location_and_direction.html

# visualization
import altair as alt

# enabling notebook renderer
# alt.renderers.enable('notebook')
# alt.renderers.enable('default')

# disabling rows limit
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

## Importing data...

In [2]:
NewTable = pd.read_csv('./data/treated_db.csv', sep=';', encoding='utf-8-sig', low_memory=False)

<br>

<font size=5>**Color palette per Order**</font>

The image below served as inspiration (https://color.adobe.com/create/image)

<img src="./src/paleta_cores.jpeg" width='500px'>

Cores: 

- verde_escuro: #284021
- verde_claro: #88BF11
- amarelo: #D9CB0B
- laranja: #D99311
- laranja_escuro: #BF4417
- marrom-_laro: #BF8D7A

In [3]:
# p.s.: Caudata is an error and should be removed
cores_ordem = {
    'Squamata': '#BF4417',
    'Testudines': '#D9CB0B', 
    'Crocodylia': '#284021'
}

ordens = list(cores_ordem.keys())
cores = list(cores_ordem.values())

Color palettes per continent: <br>
**Paleta 1:**
["#d963cf", "#10b651", "#21638f", "#bce091", "#8d102b", "#52e9e6", "#5e4028"]

<ul>
    <li style="color:#d963cf"><b>cor 1</b></li>
    <li style="color:#10b651"><b>cor 2</b></li>
    <li style="color:#21638f"><b>cor 3</b></li>
    <li style="color:#bce091"><b>cor 4</b></li>
    <li style="color:#8d102b"><b>cor 5</b></li>
    <li style="color:#52e9e6"><b>cor 6</b></li>
    <li style="color:#5e4028"><b>cor 7</b></li>
</ul>

**Paleta 2:**
["#208eb7", "#c5df72", "#382010", "#6fd8c5", "#1a6587", "#fdb5ac", "#7b0905"]

<ul>
    <li style="color:#208eb7"><b>cor 1</b></li>
    <li style="color:#c5df72"><b>cor 2</b></li>
    <li style="color:#382010"><b>cor 3</b></li>
    <li style="color:#6fd8c5"><b>cor 4</b></li>
    <li style="color:#1a6587"><b>cor 5</b></li>
    <li style="color:#fdb5ac"><b>cor 6</b></li>
    <li style="color:#7b0905"><b>cor 7</b></li>
</ul>

**Paleta 3:**
["#d963cf", "#80c6b8", "#bf3e15", "#b7d165", "#3a6566", "#efaa79", "#803e4c"]

<ul>
    <li style="color:#d963cf"><b>cor 1</b></li>
    <li style="color:#80c6b8"><b>cor 2</b></li>
    <li style="color:#bf3e15"><b>cor 3</b></li>
    <li style="color:#b7d165"><b>cor 4</b></li>
    <li style="color:#3a6566"><b>cor 5</b></li>
    <li style="color:#efaa79"><b>cor 6</b></li>
    <li style="color:#803e4c"><b>cor 7</b></li>
</ul>

In [4]:
# colors per continent
cores_continente = {
    "#N/D":"#5e4028",
    "América do Sul":"#10b651",
    "América Central":"#bce091",
    "América do Norte":"#21638f",
    "Ásia":"#d963cf",
    "África":"#52e9e6",
    "Europa":"#8d102b"
}

cores_continente3 = {
    "#N/D":"#d963cf",
    "América do Sul":"#3a6566",
    "América Central":"#b7d165",
    "América do Norte":"#80c6b8",
    "Ásia":"#803e4c",
    "África":"#efaa79",
    "Europa":"#bf3e15"
}

In [5]:
# colors per COUNTRY (different shades of its CONTINENT color)
# América do Sul
# ['#bbffd4', '#94efc6', '#57d5c9', '#00b8cc', '#0096c9', '#0071ba', '#004da4', '#002e8b', '#00237a']
# '#a6d2eb', '#9ebdcb'
# América Central
# ['#e3ff63', '#caf94f', '#b2e439', '#9acf1c', '#81ba00', '#69a600', '#519200', '#3a7e00', '#256b00']
# Ásia
# ['#f0c0d7', '#e5aec6', '#d89bb2', '#cb879c', '#bc7386', '#ad6274', '#9e5466', '#8f485a', '#803e4c']
# África
# ['#ffceb2', '#ffba94', '#efaa79', '#d39a5f', '#ba8a47', '#a57b34', '#956c25', '#895d1a', '#815010']
# Europa
# ['#ffcea9', '#ffb996', '#ffa583', '#ff916f', '#ff7d5c', '#eb6949', '#d25638', '#ba4327', '#a52e17']

cores_pais = {
    '#N/D':'#5e4028',
    'nan':'#000000',  # preto
    # América do Sul
    'Brasil':'#00237a',
    'Uruguai':'#002e8b',
    'Colômbia':'#004da4',
    'Peru':'#0071ba',
    'Paraguai':'#0096c9',
    'Argentina':'#00b8cc',
    'Guiana Francesa':'#57d5c9',
    'Venezuela':'#94efc6',
    'Guiana':'#9ebdcb',
    'Chile':'#bbffd4',
    'Equador':'#a6d2eb',
    # América Central
    'Guatemala':'#e3ff63',
    'Panamá':'#caf94f',
    'Porto Rico':'#b2e439',
    'Costa Rica':'#9acf1c',
    'México':'#256b00',
    'Nicarágua':'#81ba00',
    'Honduras':'#69a600',
    'Cuba':'#519200',
    'República Dominicana':'#3a7e00',
    # América do Norte
    'Estados Unidos':'#80c6b8',
    # Ásia
    'Israel':'#803e4c',
    'Indonésia':'#9e5466',
    'Índia':'#bc7386',
    'Filipinas':'#d89bb2',
    # África
    'África do Sul':'#ba8a47',
    'Egito':'#efaa79',
    # Europa
    'Bósnia e Herzegovina':'#ffb996',
    'Romênia':'#ff916f',
    'Alemanha':'#eb6949',
    'Kingdom':'#ba4327'
}

# apenas países da América do Sul
cores_AS = {
    'Brasil':'#00237a',
    'Uruguai':'#002e8b',
    'Colômbia':'#004da4',
    'Peru':'#0071ba',
    'Paraguai':'#0096c9',
    'Argentina':'#00b8cc',
    'Guiana Francesa':'#57d5c9',
    'Venezuela':'#94efc6',
    'Guiana':'#9ebdcb',
    'Chile':'#bbffd4',
    'Equador':'#a6d2eb',
}

<br>

Colors per Brazilian Region:

**Paleta 1:**
["#52ef99", "#944557", "#75d5e1", "#ee4454", "#b1bf81"]

<ul>
    <li style='color:#52ef99'><b>cor 1</b></li>
    <li style='color:#944557'><b>cor 2</b></li>
    <li style='color:#75d5e1'><b>cor 3</b></li>
    <li style='color:#ee4454'><b>cor 4</b></li>
    <li style='color:#b1bf81'><b>cor 5</b></li>
</ul>

**Paleta 2:**
["#48bf8e", "#cc4c3e", "#7fa69d", "#bb8377", "#2a6866"]

<ul>
    <li style='color:#48bf8e'><b>cor 1</b></li>
    <li style='color:#cc4c3e'><b>cor 2</b></li>
    <li style='color:#7fa69d'><b>cor 3</b></li>
    <li style='color:#bb8377'><b>cor 4</b></li>
    <li style='color:#2a6866'><b>cor 5</b></li>
</ul>

In [6]:
# Paleta 2
cores_regioes = {
    'SE':'#bb8377',
    'NE':'#cc4c3e',  # calor
    'S':'#48bf8e',   # frio
    'N':'#2a6866',   # floresta
    'CO':'#7fa69d'
}

# SE: ['#f6ccd0', '#eababc', '#dba9a6', '#cb968e', '#bb8377', '#ab7364', '#9c6556', '#8d594b', '#7d4f3f']
# NE: ['#ffc7aa', '#ffb499', '#ffa389', '#ff917a', '#ff7f68', '#f36e5a', '#df5d4b', '#cc4c3e', '#b83b2f']
# S: ['#9bffff', '#72f7fd', '#3be5f4', '#00d3ea', '#00c2e0', '#00b2d6', '#00a2cc', '#0093c1', '#0084b5']
# N: ['#c5e1cf', '#afd2c1', '#97c2b3', '#7db2a6', '#63a098', '#4c908a', '#3d827d', '#337570', '#2a6866']
# CO: ['#fffdfd', '#efeef0', '#d8dee3', '#c1ccd7', '#a8bac9', '#94aabb', '#869bad', '#7b8d9f', '#738093']

cores_estados = {
    # SE
    'Rio de Janeiro':'#8d594b',
    'São Paulo':'#ab7364',
    'Espírito Santo':'#cb968e',
    'Minas Gerais':'#eababc',
    # NE
    'Pernambuco':'#b83b2f',
    'Bahia':'#cc4c3e',
    'Ceará':'#df5d4b',
    'Paraíba':'#f36e5a',
    'Alagoas':'#ff7f68',
    'Piauí':'#ff917a',
    'Rio Grande do Norte':'#ffa389',
    'Sergipe':'#ffb499',
    # S
    'Paraná':'#0084b5',
    'Santa Catarina':'#00a2cc',
    'Rio Grande do Sul':'#00c2e0',
    'Santa Catarina-Rio Grande do Sul':'#3be5f4',        # ERRO NA BASE
    # N
    'Amazonas':'#2a6866',
    'Roraima':'#337570',
    'Pará':'#3d827d',
    'Acre':'#4c908a',
    'Rondônia':'#63a098',
    'Maranhão':'#7db2a6',
    'Amapá':'#97c2b3',
    'Tocantins':'#afd2c1',
    # CO
    'Goiás':'#7b8d9f',
    'Mato Grosso':'#869bad',
    'Mato Grosso do Sul':'#94aabb',
    'Distrito Federal':'#a8bac9',
    'Brasília':'#c1ccd7',
    'Minas Gerais/Goiás/Distrito Federal':'#d8dee3',    # ERRO NA BASE
}

<br>

## Counting per continent

In [7]:
# corrects a typo (Améica do Sul)
NewTable['continente'] = NewTable['continente'].apply(lambda x: 'América do Sul' if x=='Améica do Sul' else x)

In [8]:
# forces Country column to be in string format
NewTable['pais'] = NewTable['pais'].apply(lambda x:str(x))

In [9]:
# looking good...
NewTable['continente'].value_counts()

América do Sul      21927
América Central       135
América do Norte      103
Ásia                   54
#N/D                   25
África                 13
Europa                  9
Name: continente, dtype: int64

In [31]:
# grouping per Year and Continent
teste = NewTable.groupby(['ano_coleta','continente']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

# sorting...
teste = teste.sort_values(['continente', 'ano_coleta'])

In [32]:
# cumulatively counting
cumSum = []
for continente in teste['continente'].unique():
    cumSum.extend(list(teste[teste['continente'] == continente]['counts'].cumsum()))
    
teste['cumulative_sum'] = cumSum

### Chart: all continents

In [33]:
select_continent = alt.selection_multi(fields=['continente'], bind='legend')

g1 = alt.Chart(teste, title='Evolução temporal da coleção por continente', width=600).mark_line(point=True).encode(
    x= alt.X('ano_coleta', type="ordinal", title='Ano de Coleta'),
    y= alt.Y('cumulative_sum', title='', 
             sort=alt.EncodingSortField('counts', op="count", order='descending')),
    color= alt.Color('continente:N', title='Continente',
                     scale=alt.Scale(domain=list(cores_continente.keys()), range=list(cores_continente.values()))),
    tooltip= alt.Tooltip(['continente','ano_coleta','counts', 'cumulative_sum']),
    opacity= alt.condition(select_continent, alt.value(1), alt.value(0))
).add_selection(select_continent)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/cumCounts/time-spacial/evolucao_por_continente.html')

g1

### Chart: all continents (ex. South America)

In [34]:
select_continent = alt.selection_multi(fields=['continente'], bind='legend')

# removing South America from domain and range colors
continents_exSA = [c for c in teste['continente'].unique() if c != 'América do Sul']
colors_exSA = [cores_continente[c] for c in continents_exSA]

g1 = alt.Chart(teste[teste['continente']!='América do Sul'],
               title='Evolução temporal da coleção por continente (ex. América do Sul)', 
width=600).mark_line(point=True).encode(
    x= alt.X('ano_coleta', type="ordinal", title='Ano de Coleta'),
    y= alt.Y('cumulative_sum', title='', 
             sort=alt.EncodingSortField('counts', op="count", order='descending'),
             scale= alt.Scale(domain=[0,70])),
    color= alt.Color('continente:N', title='Continente',
                     scale=alt.Scale(domain= continents_exSA, range= colors_exSA)),
    tooltip= alt.Tooltip(['continente','ano_coleta','counts', 'cumulative_sum']),
    opacity= alt.condition(select_continent, alt.value(1), alt.value(0))
).add_selection(select_continent).configure_point(
    size=50
)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/cumCounts/time-spacial/evolucao_por_OUTROS_continentes.html')

g1

<br>

## Counting per Country

In [35]:
# grouping per year, continent and country
teste = NewTable.groupby(['ano_coleta','continente','pais']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

teste = teste.sort_values(['pais', 'ano_coleta'])

In [36]:
# cumulatively counting
cumSum = []
for pais in teste['pais'].unique():
    cumSum.extend(list(teste[teste['pais'] == pais]['counts'].cumsum()))
    
teste['cumulative_sum'] = cumSum

### Chart: all countries

In [42]:
select_country = alt.selection_multi(fields=['pais'], bind='legend')

g1 = alt.Chart(teste, title='Evolução temporal da coleção por país', 
width=600).mark_line(point=True).encode(
    x= alt.X('ano_coleta', type="ordinal", title='Ano de Coleta',
            scale= alt.Scale(domain=list(sorted(teste['ano_coleta'].unique())))), # fixed x-axis
    y= alt.Y('cumulative_sum', title='', 
             sort=alt.EncodingSortField('counts', op="count", order='descending'),
             scale= alt.Scale(domain=[0,20000])),
    color= alt.Color('pais:N', title='País',
                     legend= alt.Legend(columns=2, symbolLimit=42),
                     scale= alt.Scale(domain=list(cores_pais.keys()), range=list(cores_pais.values()))),
    tooltip= alt.Tooltip(['pais','ano_coleta','counts', 'cumulative_sum']),
#     opacity= alt.condition(select_country, alt.value(1), alt.value(0))
).add_selection(select_country).transform_filter(select_country)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/cumCounts/time-spacial/evolucao_por_país.html')

g1

### Chart: all countries (ex. Brasil)

In [47]:
select_country = alt.selection_multi(fields=['pais'], bind='legend')

g1 = alt.Chart(teste[teste['pais'] != 'Brasil'], title='Evolução temporal da coleção por país (ex. Brasil)', 
width=600).mark_line(point=True).encode(
    x= alt.X('ano_coleta', type="ordinal", title='Ano de Coleta',
            scale= alt.Scale(domain=list(sorted(teste['ano_coleta'].unique())))),
    y= alt.Y('cumulative_sum', title='', 
             sort=alt.EncodingSortField('counts', op="count", order='descending'),
            scale= alt.Scale(domain=[0,120])),
    color= alt.Color('pais:N', title='País',
                     legend= alt.Legend(columns=2, symbolLimit=42),
                     scale= alt.Scale(domain=list(cores_pais.keys()), range=list(cores_pais.values()))),
    tooltip= alt.Tooltip(['pais','ano_coleta','counts', 'cumulative_sum']),
#     opacity= alt.condition(select_country, alt.value(1), alt.value(0))
).add_selection(select_country).transform_filter(select_country)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/cumCounts/time-spacial/evolucao_por_país-exBrasil.html')

g1

<br>

### Chart: only South America countries

In [51]:
select_country = alt.selection_multi(fields=['pais'], bind='legend')

g1 = alt.Chart(teste[teste['continente'] == 'América do Sul'], 
               title='Evolução temporal da coleção por país da América do Sul',
width=600).mark_line(point=True).encode(
    x= alt.X('ano_coleta', type="ordinal", title='Ano de Coleta',
            scale= alt.Scale(domain=list(sorted(teste['ano_coleta'].unique())))),
    y= alt.Y('cumulative_sum', title='', 
             sort=alt.EncodingSortField('counts', op="count", order='descending'),
            scale= alt.Scale(domain=[0,20000])),
    color= alt.Color('pais:N', title='País',
                     legend= alt.Legend(columns=2),
                     scale= alt.Scale(domain=list(cores_AS.keys()), range=list(cores_AS.values()))),
    tooltip= alt.Tooltip(['pais','ano_coleta','counts']),
#     opacity= alt.condition(select_country, alt.value(1), alt.value(0))
).add_selection(select_country).transform_filter(select_country)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/cumCounts/time-spacial/evolucao_por_país-AmericaDoSul.html')

g1

### Chart: South America countries (ex. Brasil)

In [55]:
# removing Brasil from country list
countries_AS_exBr = [c for c in cores_AS.keys() if c != 'Brasil']
cores_AS_exBr = [cores_AS[c] for c in countries_AS_exBr]

select_country = alt.selection_multi(fields=['pais'], bind='legend')

g1 = alt.Chart(teste[(teste['continente'] == 'América do Sul') & (teste['pais'] != 'Brasil')], 
               title='Evolução temporal da coleção por país da América do Sul (ex. Brasil)',
width=600).mark_line(point=True).encode(
    x= alt.X('ano_coleta', type="ordinal", title='Ano de Coleta',
            scale= alt.Scale(domain=list(sorted(teste['ano_coleta'].unique())))),
    y= alt.Y('cumulative_sum', title='', 
             sort=alt.EncodingSortField('counts', op="count", order='descending'),
            scale= alt.Scale(domain=[0,120])),
    color= alt.Color('pais:N', title='País',
                     legend= alt.Legend(columns=2),
                     scale= alt.Scale(domain=countries_AS_exBr, range=cores_AS_exBr)),
    tooltip= alt.Tooltip(['pais','ano_coleta','counts']),
#     opacity= alt.condition(select_country, alt.value(1), alt.value(0))
).add_selection(select_country).transform_filter(select_country)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/cumCounts/time-spacial/evolucao_por_país-AmericaDoSul-exBrasil.html')

g1

<br>

## Counting per Brazilian State

In [56]:
# filtering for Brazil, only
teste2 = NewTable[NewTable['pais'] == 'Brasil']
teste2 = teste2.groupby(['ano_coleta','estado_ou_provincia', 'ordem']).count()['class'].reset_index().rename(columns={
    'class':'counts'
})

### creating column with brazilian regions

In [57]:
regioes = {
    'Rio de Janeiro':'SE',
    'São Paulo':'SE',
    'Espírito Santo': 'SE',
    'Pernambuco':'NE',
    'Santa Catarina':'S',
    'Amazonas':'N',
    'Goiás':'CO',
    'Roraima':'N',
    'Pará':'N',
    'Mato Grosso':'CO',
    'Acre': 'N',
    'Bahia': 'NE',
    'Minas Gerais': 'SE',
    'Mato Grosso do Sul': 'CO',
    'Paraná': 'S',
    'Rondônia': 'N',
    'Ceará': 'NE',
    'Maranhão': 'N',
    'Rio Grande do Sul': 'S',
    'Paraíba': 'NE',
    'Distrito Federal': 'CO',
    'Alagoas': 'NE',
    'Amapá':'N',
    'Piauí': 'NE',
    'Brasília': 'CO',
    'Tocantins': 'N',
    'Rio Grande do Norte': 'NE',
    'Sergipe': 'NE',
    'Minas Gerais/Goiás/Distrito Federal': 'CO',
    'Santa Catarina-Rio Grande do Sul': 'S'
}

# criando coluna com as regiões
teste2['regiao'] = teste2['estado_ou_provincia'].apply(lambda x: regioes[str(x)])

# coluna com estado, regiao
teste2['regiao_e_estado'] = teste2['estado_ou_provincia'] + ', ' + teste2['regiao']

# ordenando por região e soma das contagens
sorting = teste2.groupby(['regiao_e_estado', 'regiao']).sum()['counts'].reset_index(
                                                                ).rename(columns={'counts':'soma'})
sorting = sorting.sort_values(['regiao','soma'], ascending=False)['regiao_e_estado'].unique()

In [58]:
# OBS: variável teste2 tem as informações que precisamos (vide gráfico de contagem por região - time_spacial)
teste = teste2.groupby(['ano_coleta','regiao_e_estado']).count()['ordem'].reset_index().rename(columns={
    'ordem':'counts'
})

teste = teste.sort_values(['regiao_e_estado', 'ano_coleta'])

In [59]:
# cumulatively counting
cumSum = []
for reg_est in teste['regiao_e_estado'].unique():
    cumSum.extend(list(teste[teste['regiao_e_estado'] == reg_est]['counts'].cumsum()))
    
teste['cumulative_sum'] = cumSum

In [60]:
teste['estado'] = teste['regiao_e_estado'].apply(lambda x:x.split(',')[0])
teste['regiao'] = teste['regiao_e_estado'].apply(lambda x:x.split(',')[1])

### Chart: cumulative counts per Brazilian State

In [65]:
select_state = alt.selection_multi(fields=['estado'], bind='legend')

g1 = alt.Chart(teste, title='Evolução temporal da coleção por Estado', width=600).mark_line(point=True).encode(
    x= alt.X('ano_coleta', type="ordinal", title='Ano de Coleta',
             scale= alt.Scale(domain=sorted(list(teste['ano_coleta'].unique())))),
    y= alt.Y('cumulative_sum', title='', 
             sort=alt.EncodingSortField('counts', op="count", order='descending'),
             scale= alt.Scale(domain=[0,140])),
    color= alt.Color('estado:N', title='Estado', 
                        legend= alt.Legend(columns=2, symbolLimit=42),
                     scale= alt.Scale(domain=list(cores_estados.keys()), range=list(cores_estados.values()))),
    tooltip= alt.Tooltip(['regiao_e_estado','ano_coleta','counts', 'cumulative_sum']),
#     opacity= alt.condition(select_country, alt.value(1), alt.value(0))
).add_selection(select_state).transform_filter(select_state)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/cumCounts/time-spacial/evolucao_por_estado.html')

g1

<br>

### Chart: cumulative counts per Brazilian Region

In [82]:
temp = teste2.groupby(['ano_coleta','regiao']).count()['ordem'].reset_index().rename(columns={
    'ordem':'counts'
})

temp = temp.sort_values(['regiao', 'ano_coleta'])

In [83]:
# cumulatively counting
cumSum = []
for reg in temp['regiao'].unique():
    cumSum.extend(list(temp[temp['regiao'] == reg]['counts'].cumsum()))
    
temp['cumulative_sum'] = cumSum

In [88]:
select_region = alt.selection_multi(fields=['regiao'], bind='legend')

g1 = alt.Chart(temp, title='Evolução temporal da coleção por Região', width=600).mark_line(point=True).encode(
    x= alt.X('ano_coleta', type="ordinal", title='Ano de Coleta',
             scale= alt.Scale(domain=sorted(list(teste['ano_coleta'].unique())))),
    y= alt.Y('cumulative_sum', title='', 
             sort=alt.EncodingSortField('counts', op="count", order='descending'),
             scale= alt.Scale(domain=[0,350])),
    color= alt.Color('regiao:N', title='Região', 
                        legend= alt.Legend(columns=1, symbolLimit=42),
                     scale= alt.Scale(domain=list(cores_regioes.keys()), range=list(cores_regioes.values()))),
    tooltip= alt.Tooltip(['regiao','ano_coleta','counts', 'cumulative_sum']),
#     opacity= alt.condition(select_country, alt.value(1), alt.value(0))
).add_selection(select_region).transform_filter(select_region)

g1 = g1.configure_title(fontSize=16).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)

# g1.save('./graphs/cumCounts/time-spacial/evolucao_por_regiao.html')

g1

<br>

**That's it!**