# Exercício Gráfico Fluxo.
Vocês vão utilizar um dos tipos de gráfico do tipo Fluxo (Flow).  
  
Referência: https://www.python-graph-gallery.com/  
  
Em grupo vocês devem decidir qual base aberta vão utilizar para fazer uma demonstracão de um fluxo de dados.  
  
No final da aula de hoje vocês devem ter decidido qual base utilizar e se possível uma tentatativa de como utilizar um dos gráficos de Fluxo.  

Base selecionada: https://www.kaggle.com/amandaroseknudsen/edgarfoodemissions/download  
Gráfico selecionado: Sankey

In [1]:
# Importação Bibliotecas
from IPython.display import Image, display
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [2]:
# Importação dos dados
df = pd.read_csv('EDGARfood.csv',sep=",")

# Filtragem para trabalharmos somente com dados de 2015
df= df[df.Year == 2015]

In [3]:
df

Unnamed: 0,Food System Stage,FS Stage Order,GHG,Country,Year,GHG Emissions,Unit
25,Land,Stage 1,Methane (CH4),Afghanistan,2015,0.0000,"metric tons CO2e (GWP-100, AR5)"
51,Land,Stage 1,Methane (CH4),Albania,2015,241.6000,"metric tons CO2e (GWP-100, AR5)"
77,Land,Stage 1,Methane (CH4),Algeria,2015,104.5000,"metric tons CO2e (GWP-100, AR5)"
103,Land,Stage 1,Methane (CH4),Angola,2015,782808.4000,"metric tons CO2e (GWP-100, AR5)"
129,Land,Stage 1,Methane (CH4),Argentina,2015,125203.7000,"metric tons CO2e (GWP-100, AR5)"
...,...,...,...,...,...,...,...
128595,Waste,Stage 8,Nitrous oxide (N2O),Viet Nam,2015,922927.8818,"metric tons CO2e (GWP-100, AR5)"
128621,Waste,Stage 8,Nitrous oxide (N2O),Western Sahara,2015,4751.4500,"metric tons CO2e (GWP-100, AR5)"
128647,Waste,Stage 8,Nitrous oxide (N2O),Yemen,2015,152628.3400,"metric tons CO2e (GWP-100, AR5)"
128673,Waste,Stage 8,Nitrous oxide (N2O),Zambia,2015,91272.3600,"metric tons CO2e (GWP-100, AR5)"


In [4]:
df.columns

Index(['Food System Stage', 'FS Stage Order', 'GHG', 'Country', 'Year',
       'GHG Emissions', 'Unit'],
      dtype='object')

In [5]:
df.drop(columns=['Year', 'Unit', 'FS Stage Order'], inplace = True)

In [6]:
df

Unnamed: 0,Food System Stage,GHG,Country,GHG Emissions
25,Land,Methane (CH4),Afghanistan,0.0000
51,Land,Methane (CH4),Albania,241.6000
77,Land,Methane (CH4),Algeria,104.5000
103,Land,Methane (CH4),Angola,782808.4000
129,Land,Methane (CH4),Argentina,125203.7000
...,...,...,...,...
128595,Waste,Nitrous oxide (N2O),Viet Nam,922927.8818
128621,Waste,Nitrous oxide (N2O),Western Sahara,4751.4500
128647,Waste,Nitrous oxide (N2O),Yemen,152628.3400
128673,Waste,Nitrous oxide (N2O),Zambia,91272.3600


In [7]:
# Filtrando somente alguns países para limitar a quantidade de dados

df = df.loc[(df['Country'] == 'Brazil') | (df['Country'] == 'Argentina') | (df['Country'] == 'Peru')]
df

Unnamed: 0,Food System Stage,GHG,Country,GHG Emissions
129,Land,Methane (CH4),Argentina,1.252037e+05
519,Land,Methane (CH4),Brazil,9.770882e+05
3275,Land,Methane (CH4),Peru,1.748970e+04
4731,Land,Carbon dioxide (CO2),Argentina,1.299663e+08
5147,Land,Carbon dioxide (CO2),Brazil,5.312697e+08
...,...,...,...,...
121523,Waste,Carbon dioxide (CO2),Brazil,1.354404e+05
122953,Waste,Carbon dioxide (CO2),Peru,4.275227e+03
123733,Waste,Nitrous oxide (N2O),Argentina,6.678611e+05
124201,Waste,Nitrous oxide (N2O),Brazil,3.148454e+06


In [8]:
# Retirando alguns dados que não serão tratados no gráfico

df = df.drop(df[df['Food System Stage']=='Farm'].index)
df = df.drop(df[df['Food System Stage']=='Land'].index)
df = df.drop(df[df['GHG']=='Methane (CH4)'].index)
df = df.drop(df[df['GHG']=='Carbon dioxide (CO2)'].index)

df=df.reset_index()
df

Unnamed: 0,index,Food System Stage,GHG,Country,GHG Emissions
0,42119,Processing,Nitrous oxide (N2O),Argentina,187536.5
1,42613,Processing,Nitrous oxide (N2O),Brazil,1903494.0
2,45733,Processing,Nitrous oxide (N2O),Peru,56898.67
3,58447,Transport,Nitrous oxide (N2O),Argentina,45748.49
4,58941,Transport,Nitrous oxide (N2O),Brazil,195351.8
5,62113,Transport,Nitrous oxide (N2O),Peru,15749.29
6,75087,Packaging,Nitrous oxide (N2O),Argentina,1829.577
7,75581,Packaging,Nitrous oxide (N2O),Brazil,253567.9
8,78701,Packaging,Nitrous oxide (N2O),Peru,319.602
9,91467,Retail,F-gases (Fluorinated),Brazil,2736974.0


In [9]:
df = df.sort_values(['Country', 'Food System Stage', 'GHG Emissions'])

In [10]:
# Montagem da lista de itens (países, processos e gases) para o gráfico

cores = ['#99EEFF', '#216D38', '#F24141', '#FFFFE0', '#98FB98', '#66CDAA', '#AFEEEE', '#B0E0E6', '#DEB887', '#DCDCDC', '#9370DB', '#FFEFD5', '#4682B4', '#778899']

# Nodes & links
nodes = [['ID', 'Label', 'Color']]

contador = 0

for item in df['Country'].unique():
    nodes.append([contador, item, cores[contador]])
    contador += 1 
    
for item in df['Food System Stage'].unique():
    nodes.append([contador, item, cores[contador]])
    contador += 1
    
for item in df['GHG'].unique():
    nodes.append([contador, item, cores[contador]])
    contador += 1 
    
nodes

[['ID', 'Label', 'Color'],
 [0, 'Argentina', '#99EEFF'],
 [1, 'Brazil', '#216D38'],
 [2, 'Peru', '#F24141'],
 [3, 'Consumer', '#FFFFE0'],
 [4, 'Packaging', '#98FB98'],
 [5, 'Processing', '#66CDAA'],
 [6, 'Retail', '#AFEEEE'],
 [7, 'Transport', '#B0E0E6'],
 [8, 'Waste', '#DEB887'],
 [9, 'Nitrous oxide (N2O)', '#DCDCDC'],
 [10, 'F-gases (Fluorinated)', '#9370DB']]

In [11]:
# Função para localizar o id do nó conforme o texto

def LocalizaNode(nodes, texto):
    for item in nodes:
        if(item[1]==texto):
            return(item[0])
        
LocalizaNode(nodes, 'Waste') #Teste da função

8

In [12]:
# Montagem da lista de ligações

links = [['Source','Target','Value','Link Color']]

for index, row in df.iterrows():
    links.append([LocalizaNode(nodes, row['Country']), LocalizaNode(nodes, row['Food System Stage']), row['GHG Emissions'], cores[LocalizaNode(nodes, row['Country'])]])
       
for index, row in df.iterrows():
    links.append([LocalizaNode(nodes, row['Food System Stage']), LocalizaNode(nodes, row['GHG']), row['GHG Emissions'], cores[LocalizaNode(nodes, row['Country'])]])

links

[['Source', 'Target', 'Value', 'Link Color'],
 [0, 3, 304090.6915, '#99EEFF'],
 [0, 4, 1829.57668, '#99EEFF'],
 [0, 5, 187536.4648, '#99EEFF'],
 [0, 6, 1779.762243, '#99EEFF'],
 [0, 7, 45748.4922, '#99EEFF'],
 [0, 8, 667861.1294, '#99EEFF'],
 [1, 3, 2236095.91, '#216D38'],
 [1, 4, 253567.8727, '#216D38'],
 [1, 5, 1903493.844, '#216D38'],
 [1, 6, 6986.909261, '#216D38'],
 [1, 6, 2736973.669, '#216D38'],
 [1, 7, 195351.805, '#216D38'],
 [1, 8, 3148453.744, '#216D38'],
 [2, 3, 116256.8974, '#F24141'],
 [2, 4, 319.601979, '#F24141'],
 [2, 5, 56898.67143, '#F24141'],
 [2, 6, 412.271898, '#F24141'],
 [2, 6, 460847.3848, '#F24141'],
 [2, 7, 15749.28865, '#F24141'],
 [2, 8, 212460.4707, '#F24141'],
 [3, 9, 304090.6915, '#99EEFF'],
 [4, 9, 1829.57668, '#99EEFF'],
 [5, 9, 187536.4648, '#99EEFF'],
 [6, 9, 1779.762243, '#99EEFF'],
 [7, 9, 45748.4922, '#99EEFF'],
 [8, 9, 667861.1294, '#99EEFF'],
 [3, 9, 2236095.91, '#216D38'],
 [4, 9, 253567.8727, '#216D38'],
 [5, 9, 1903493.844, '#216D38'],
 [6, 9

In [13]:
# Montagem do gráfico

nodes_headers = nodes.pop(0)
links_headers = links.pop(0)
df_nodes = pd.DataFrame(nodes, columns = nodes_headers)
df_links = pd.DataFrame(links, columns = links_headers)

# Configurações do Sankey
data_trace = dict(
    type='sankey',
    domain = dict(
      x =  [0,1],
      y =  [0,1]
    ),
    orientation = "h",
    valueformat = ".0f",
    node = dict(
      pad = 10,
      thickness = 10,
      line = dict(
        color = "black",
        width = 0
      ),
      label =  df_nodes['Label'].dropna(axis=0, how='any'),
      color = df_nodes['Color']
    ),
    link = dict(
      source = df_links['Source'].dropna(axis=0, how='any'),
      target = df_links['Target'].dropna(axis=0, how='any'),
      value = df_links['Value'].dropna(axis=0, how='any'),
      color = df_links['Link Color'].dropna(axis=0, how='any'),
  )
)

layout = dict(
        title = "Emissão de gases por processo",
    height = 772,
    font = dict(
      size = 10),)

# Plotagem do gráfico
fig = dict(data=[data_trace], layout=layout)
iplot(fig, validate=False)