# Sankey Diagram

---

## Exportação e importação (visualização em sankey)
- Usando plotly

### Tratamento dos dados

* Pegando dataset de exportação e importação realizados no mundo no ano de 2024

In [45]:
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px  #paletas

d3_colors = px.colors.qualitative.D3

In [2]:
cols=[
    'period','flowCode', 'reporterDesc', 'reporterISO', 'partnerDesc', 'partnerISO', 'refYear', 'primaryValue'
]

#lendo o dataset
df = pd.read_excel("TradeData.xlsx", usecols=cols) #todas exportações de 2024
df.head()
map_df = pd.read_csv("country_continent_full.csv") #dataset de ISO/PAIS/CONTINENTE


* Unindo o dataset das infos de ISO-continente com o df existente

In [None]:
# Merge df e map_df 
df = pd.merge(df, map_df, left_on='partnerISO', right_on='ISO3', how='left')

df.rename(columns={'Continent': 'partner continent'}, inplace=True)

df.drop(['Country','ISO3'], axis=1, inplace=True)

display(df.head())

Unnamed: 0,refYear,period,reporterISO,reporterDesc,flowCode,partnerISO,partnerDesc,primaryValue,partner continent
0,2024,2024,ALB,Albania,M,W00,World,9604259000.0,
1,2024,2024,ALB,Albania,M,AFG,Afghanistan,530.78,Other/Unknown
2,2024,2024,ALB,Albania,M,ATA,Antarctica,638.766,Other/Unknown
3,2024,2024,ALB,Albania,M,DZA,Algeria,75873450.0,Africa
4,2024,2024,ALB,Albania,M,ASM,American Samoa,4946.63,Other/Unknown


In [33]:
#Todas as exportações do Brasil

brazilExp = df[(df['reporterISO'] == 'BRA') & (df['flowCode'] == 'X') & (df['partnerISO'] != 'W00')]
brazilImp = df[(df['reporterISO'] == 'BRA') & (df['flowCode'] == 'M') & (df['partnerISO'] != 'W00')]

brazilExp.head()

Unnamed: 0,refYear,period,reporterISO,reporterDesc,flowCode,partnerISO,partnerDesc,primaryValue,partner continent
4381,2024,2024,BRA,Brazil,X,AFG,Afghanistan,3098084.0,Other/Unknown
4382,2024,2024,BRA,Brazil,X,ALB,Albania,100579900.0,Europe
4383,2024,2024,BRA,Brazil,X,ATA,Antarctica,127253.0,Other/Unknown
4384,2024,2024,BRA,Brazil,X,DZA,Algeria,2567007000.0,Africa
4385,2024,2024,BRA,Brazil,X,ASM,American Samoa,873659.0,Other/Unknown


##  

#### 1. Primeira visualização: exportação brasil -> seus top n parceiros 

In [None]:
# Filtrar Brasil exportações
brazilExp = df[(df['reporterISO'] == 'BRA') &
               (df['flowCode'] == 'X') &
               (df['partnerISO'] != 'W00')]

# Top n parceiros por valor
n = 15

top_partners = brazilExp.groupby("partnerDesc")["primaryValue"].sum().nlargest(n).reset_index()

# Criar labels (Brasil + parceiros)
labels = ["Brazil"] + top_partners["partnerDesc"].tolist()

# Links
source = [0] * len(top_partners)  # sempre Brasil
target = list(range(1, len(labels)))  # cada parceiro
value = top_partners["primaryValue"].tolist()



def hex_to_rgba(hex_color: str, alpha: float = 0.5) -> str:
    h = hex_color.lstrip('#')
    r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
    return f'rgba({r},{g},{b},{alpha})'

palette = {label: d3_colors[i % len(d3_colors)] for i, label in enumerate(labels)}

# cores dos nós na mesma ordem de 'labels'
node_colors = [palette[label] for label in labels]

# cores dos links herdando a cor do nó de origem (links_source são índices de 'labels')
link_colors = [hex_to_rgba(palette[labels[s]], 0.5) for s in source]


# Construir Sankey
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=labels,
        color=node_colors
    ),
    link=dict(
        source=source,
        target=target,
        value=value,
        color=link_colors
    )
)])

fig.update_layout(title_text=f" 🌎 Exportações do Brasil - Top {n} Parceiros", font_size=12)
fig.show()


#### 2. Adicionando mais um nó ao grafico, permitindo informação de quantidade exportada por continente e por pais

In [60]:
# Filtrar Brasil exportações
brazilExp = df[(df['reporterISO'] == 'BRA') &
               (df['flowCode'] == 'X') &
               (df['partnerISO'] != 'W00')]

# Top n países por valor (agrupando também por continente)

n = 20
top_partners = brazilExp.groupby(["partnerDesc", "partner continent"])["primaryValue"].sum().nlargest(20).reset_index()

# Labels
labels = ["Brazil"]
continents = top_partners["partner continent"].unique().tolist()
labels.extend(continents)
labels.extend(top_partners["partnerDesc"].tolist())

# Índices para lookup
label_index = {label: i for i, label in enumerate(labels)}

# Links Brasil → Continente
links_source = []
links_target = []
links_value = []

for cont in continents:
    value = top_partners[top_partners["partner continent"] == cont]["primaryValue"].sum()
    links_source.append(label_index["Brazil"])
    links_target.append(label_index[cont])
    links_value.append(value)

# Links Continente → País
for _, row in top_partners.iterrows():
    links_source.append(label_index[row["partner continent"]])
    links_target.append(label_index[row["partnerDesc"]])
    links_value.append(row["primaryValue"])


d3_colors = px.colors.qualitative.D3  # category10

def hex_to_rgba(hex_color: str, alpha: float = 0.5) -> str:
    h = hex_color.lstrip('#')
    r, g, b = int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
    return f'rgba({r},{g},{b},{alpha})'

# mapa label → cor (estável)
palette = {label: d3_colors[i % len(d3_colors)] for i, label in enumerate(labels)}

# cores dos nós, na ordem de 'labels'
node_colors = [palette[label] for label in labels]

# cores dos links herdando a cor do nó de ORIGEM
link_colors = [hex_to_rgba(palette[labels[s]], 0.5) for s in links_source]

# Sankey
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=20,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=labels,
        color=node_colors
    ),
    link=dict(
        source=links_source,
        target=links_target,
        value=links_value,
        color=link_colors
    )
)])


fig.update_layout(title_text="🌎 Exportações do Brasil - Brasil → Continente → País (20)", font_size=12)
fig.show()

#### 3.Representação com todos os parceiros comerciais do Brasil, gerando uma visualização comprometida

In [62]:
# Filtrar Brasil exportações
brazilExp_all = df[(df['reporterISO'] == 'BRA') &
                  (df['flowCode'] == 'X') &
                  (df['partnerISO'] != 'W00')].copy()

# Group by continent and then by country
continent_agg = brazilExp_all.groupby('partner continent')['primaryValue'].sum().reset_index()
country_agg = brazilExp_all.groupby(['partner continent', 'partnerDesc'])['primaryValue'].sum().reset_index()

# Labels
labels = ["Brazil"]
continents = continent_agg["partner continent"].tolist()
labels.extend(continents)
labels.extend(country_agg["partnerDesc"].tolist())

# Índices para lookup
label_index = {label: i for i, label in enumerate(labels)}

# Links Brasil → Continente
links_source = []
links_target = []
links_value = []

for _, row in continent_agg.iterrows():
    links_source.append(label_index["Brazil"])
    links_target.append(label_index[row["partner continent"]])
    links_value.append(row["primaryValue"])

# Links Continente → País
for _, row in country_agg.iterrows():
    if row["partner continent"] in label_index and row["partnerDesc"] in label_index:
        links_source.append(label_index[row["partner continent"]])
        links_target.append(label_index[row["partnerDesc"]])
        links_value.append(row["primaryValue"])


# Sankey
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=labels,
        color=node_colors
    ),
    link=dict(
        source=links_source,
        target=links_target,
        value=links_value,
    )
)])

fig.update_layout(title_text="🌎 Exportações do Brasil - Brasil → Continente → País (Todos os Parceiros) - Visualização comprometida", font_size=10)
fig.show()

-----