# Setup

In [1]:
import pandas as pd
import altair as vg
import seaborn as sns
import numpy as np

# Data Prep

In [2]:
filepath = r'C:\Users\guilh\Desktop\Dados_v3.xlsx'
df = pd.read_excel(filepath)

In [3]:
df.head()

Unnamed: 0,Sigla,Nome,Latitude,Longitude,Município,Modal,Rio,Latitude_2,Longitude_2,OBS,Considerar,Carga_2023,Carga_2024,GN,GNL,Conteineres/Ano
0,AM-000,MANAUS,-3.138087,-60.027484,Manaus,,Negro,-3.138087,-60.027484,Porto de Manaus,0,0,0,,,
1,AM-083,SACAMBU,-3.272442,-60.952701,Manacapuru,Hidroviário,Solimões,-3.274633,-60.933956,"Lago Cabaliana, UTE Sacambu; este nó será agre...",0,1487,1511,455875.7,759.792829,15.195857
2,AM-028,CAMPINAS,-3.278834,-61.099313,Manacapuru,Hidroviário,Solimões,-3.278834,-61.099313,Lago Cabaliana + Rio Paraná do Aramã; este nó ...,1,1085,1099,331573.4,552.622316,11.052446
3,AM-023,CAAPIRANGA,-3.324601,-61.212903,Manacapuru,Gasoduto,Solimões,-3.278834,-61.099313,Verificar acesso ao município; supondo entrega...,0,9984,10253,3093378.0,5155.629306,103.112586
4,AM-004,ANAMÃ,-3.572283,-61.407035,Anamã,Gasoduto,Solimões,-3.572283,-61.407035,,0,12675,13347,4026852.0,6711.41952,134.22839


In [4]:
df.dtypes

Sigla               object
Nome                object
Latitude           float64
Longitude          float64
Município           object
Modal               object
Rio                 object
Latitude_2         float64
Longitude_2        float64
OBS                 object
Considerar           int64
Carga_2023           int64
Carga_2024           int64
GN                 float64
GNL                float64
Conteineres/Ano    float64
dtype: object

# EDA

In [5]:
df.describe()

Unnamed: 0,Latitude,Longitude,Latitude_2,Longitude_2,Considerar,Carga_2023,Carga_2024,GN,GNL,Conteineres/Ano
count,86.0,86.0,86.0,86.0,96.0,96.0,96.0,82.0,82.0,82.0
mean,-3.773587,-63.488361,-3.778263,-63.564175,0.625,20103.364583,20673.541667,6879314.0,11465.524114,229.310482
std,2.07632,4.406264,2.087978,4.343704,0.486664,31821.860685,32843.231021,10439840.0,17399.725018,347.9945
min,-8.747551,-72.581236,-8.747551,-72.581236,0.0,0.0,0.0,65168.2,108.613667,2.172273
25%,-4.721653,-67.061608,-4.7197,-67.061608,0.0,1568.25,1626.75,798084.2,1330.140293,26.602806
50%,-3.354899,-62.973198,-3.350754,-63.020109,1.0,10170.0,10398.0,3806758.0,6344.596973,126.891939
75%,-2.746235,-59.750366,-2.707575,-60.004683,1.0,21500.5,22015.5,7134032.0,11890.053804,237.801076
max,1.190652,-56.668167,1.190652,-56.668167,1.0,210942.0,220434.0,66505960.0,110843.264444,2216.865289


## Mapa de demanda

In [28]:
# Topo map
filepath = 'https://raw.githubusercontent.com/tbrugz/geodata-br/master/geojson/geojs-13-mun.json'

topo = vg.Chart(filepath).mark_geoshape(
    stroke="#dd",
    strokeWidth=0.3,
    color="black",
).project(
    type='mercator'
)

# Demanda
selection = vg.selection_multi(fields=['Rio'], bind='legend')
points = vg.Chart(df, title="Demanda em contêinereres por ano").mark_point().encode(
    vg.Longitude('Longitude:Q',
                ),
    vg.Latitude('Latitude:Q',
               ),
    color= vg.condition(selection,
                        vg.Color('Rio:N', scale=vg.Scale(scheme='category10')),
                        vg.value('lightgray'),
                       ),
    size=vg.Size('Conteineres/Ano:Q', 
                 scale=None,
                 legend=vg.Legend(title='Conteineres por Ano')),
    tooltip=['Sigla', 'Nome', 'Latitude', 'Longitude', 'Município', 'Modal', 'Rio',
             'Latitude_2', 'Longitude_2', 'OBS', 'Considerar', 'Carga_2023',
             'Carga_2024', 'GN', 'GNL', 'Conteineres/Ano']
)

# Mapa + Demanda + Configs
vg.layer(
    topo,
    points,
).properties(
    width=600,
    height=600,
).configure_title(
    fontSize=24,
).configure_axis(
    grid=False,
    labelFontSize=14,
    titleFontSize=20,
).configure_legend(
    titleFontSize=16,
    labelFontSize=14, 
).add_selection(
    selection
)

## Demanda (permite zoom e seleção na legenda)

In [7]:
selection = vg.selection_multi(fields=['Rio'], bind='legend')

chart = vg.Chart(df, title="Demanda em contêinereres por ano").mark_point().encode(
    vg.X('Longitude:Q',
          scale=vg.Scale(zero=False)
        ),
    vg.Y('Latitude:Q',
         scale=vg.Scale(zero=False)
        ),
    color= vg.condition(selection,
                        vg.Color('Rio:N', scale=vg.Scale(scheme='category10')),
                        vg.value('lightgray'),
                       ),
    size=vg.Size('Conteineres/Ano:Q', 
                 scale=None,
                 legend=vg.Legend(title='Conteineres por Ano')),
    tooltip=['Sigla', 'Nome', 'Latitude', 'Longitude', 'Município', 'Modal', 'Rio',
             'Latitude_2', 'Longitude_2', 'OBS', 'Considerar', 'Carga_2023',
             'Carga_2024', 'GN', 'GNL', 'Conteineres/Ano']
).properties(
    width=600,
    height=600,
).configure_title(
    fontSize=24,
).configure_axis(
    grid=False,
    labelFontSize=14,
    titleFontSize=20,
).configure_legend(
    titleFontSize=16,
    labelFontSize=14, 
).add_selection(
    selection
).interactive()

chart

## Pareto

In [8]:
df_pareto = df.copy()
df_pareto = df_pareto[df_pareto['Conteineres/Ano'].notna()]
df_pareto = df_pareto.sort_values(by=['Conteineres/Ano'], ascending=False)

df_pareto["count cumsum"] = df_pareto['Conteineres/Ano'].cumsum()
df_pareto["cumpercentage"] = df_pareto["count cumsum"]/(df_pareto['Conteineres/Ano'].sum())

sort_order = df_pareto["Nome"].tolist()

selection = vg.selection_multi(fields=['Rio'], bind='legend')

# Create Base
base = vg.Chart(df_pareto, title="Pareto da Demanda").encode(
    vg.X("Nome:O",
         sort=sort_order),
).properties(
    width=800,
    height=600,
)
# Bars chart
bars = base.mark_bar().encode(
    vg.Y("Conteineres/Ano:Q"),
    color= vg.condition(selection,
                        vg.Color('Rio:N', scale=vg.Scale(scheme='category10')),
                        vg.value('lightgray'),
                       ),
).properties(
    width=800,
    height=600,
)
# Line chart
line = base.mark_line(strokeWidth=1.5, color="#cb4154" ).encode(
    vg.Y('cumpercentage:Q',
    title='Cumulative Count',
    axis=vg.Axis(format=".0%")   ),
    text = vg.Text('cumpercentage:Q')
)
# Mark the percentage values on the line with Circle marks
points = base.mark_circle(strokeWidth= 3, color = "#cb4154").encode(
    vg.Y('cumpercentage:Q', axis=None),
    tooltip=['Nome', 'Município', 'Modal', 'Rio', 'OBS', 'Considerar', 'Conteineres/Ano'],
)

# Layer all the elements together 
(bars + line + points).resolve_scale(
    y = 'independent'
).configure_title(
    fontSize=24,
).configure_axis(
    grid=False,
    labelFontSize=10,
    titleFontSize=20,
).add_selection(
    selection
).interactive()

# Considerar

In [25]:
# Topo map
filepath = 'https://raw.githubusercontent.com/tbrugz/geodata-br/master/geojson/geojs-13-mun.json'

topo = vg.Chart(filepath).mark_geoshape(
    stroke="#dd",
    strokeWidth=0.3,
    opacity=0.2,
    color="black",
).project(
    type='mercator'
)

# Demanda
selection = vg.selection_multi(fields=['Rio'], bind='legend')
points = vg.Chart(df[df.Considerar == 1], title="Nós Considerados").mark_line(point=True, size=3).encode(
    vg.Longitude('Longitude:Q',
                ),
    vg.Latitude('Latitude:Q',
               ),
    color= vg.condition(selection,
                        vg.Color('Rio:N', scale=vg.Scale(scheme='category10')),
                        vg.value('lightgray'),
                       ),
    #size=vg.Size('Conteineres/Ano:Q', 
    #             scale=None,
    #             legend=vg.Legend(title='Conteineres por Ano')),
    tooltip=['Sigla', 'Nome', 'Latitude', 'Longitude', 'Município', 'Modal', 'Rio',
             'Latitude_2', 'Longitude_2', 'OBS', 'Considerar', 'Carga_2023',
             'Carga_2024', 'GN', 'GNL', 'Conteineres/Ano']
)

# Mapa + Demanda + Configs
vg.layer(
    topo,
    points,
).properties(
    width=600,
    height=600,
).configure_title(
    fontSize=24,
).configure_axis(
    grid=False,
    labelFontSize=14,
    titleFontSize=20,
).configure_legend(
    titleFontSize=16,
    labelFontSize=14, 
).add_selection(
    selection
)