# Sugar data visualization
We wanted to make a visualization with some data mentioning sugar production or consumption.

In [1]:
import pandas as pd
import numpy as np
import plotly as py
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
import plotly.graph_objs as go
import plotly.express as px

In [2]:
init_notebook_mode(connected=True)

NameError: name 'pyo' is not defined

## Sugar data set analysis

This data set comes from the FAO, which is a suborganization of the UN, focused on Food and Agriculture. It depicts how many tons were produced and how many hectares were used for the production of sugar by country and year.

In [14]:
data = pd.read_csv('../data/sugar/FAOSTAT_sugarproduction_5-16-2023.csv')
data

Unnamed: 0,Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Item Code (CPC),Item,Year Code,Year,Unit,Value,Flag,Flag Description
0,QCL,Crops and livestock products,4,Afghanistan,5312,Area harvested,1802,Sugar cane,2008,2008,ha,3000.0,E,Estimated value
1,QCL,Crops and livestock products,4,Afghanistan,5312,Area harvested,1802,Sugar cane,2009,2009,ha,3080.0,A,Official figure
2,QCL,Crops and livestock products,4,Afghanistan,5312,Area harvested,1802,Sugar cane,2010,2010,ha,3100.0,A,Official figure
3,QCL,Crops and livestock products,4,Afghanistan,5312,Area harvested,1802,Sugar cane,2011,2011,ha,3082.0,A,Official figure
4,QCL,Crops and livestock products,4,Afghanistan,5312,Area harvested,1802,Sugar cane,2012,2012,ha,3100.0,A,Official figure
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2951,QCL,Crops and livestock products,716,Zimbabwe,5510,Production,1802,Sugar cane,2017,2017,tonnes,3101000.0,T,Unofficial figure
2952,QCL,Crops and livestock products,716,Zimbabwe,5510,Production,1802,Sugar cane,2018,2018,tonnes,3582994.0,T,Unofficial figure
2953,QCL,Crops and livestock products,716,Zimbabwe,5510,Production,1802,Sugar cane,2019,2019,tonnes,3562000.0,T,Unofficial figure
2954,QCL,Crops and livestock products,716,Zimbabwe,5510,Production,1802,Sugar cane,2020,2020,tonnes,3543771.0,T,Unofficial figure


In [8]:
# extract only 2021 values as per tons produced (as opposed to hectares occupied by the growth of sugar cane)
data = data[data['Year']==2021]
data = data[data['Unit']=='tonnes']
data.reset_index()

Unnamed: 0,index,Domain Code,Domain,Area Code (M49),Area,Element Code,Element,Item Code (CPC),Item,Year Code,Year,Unit,Value,Flag,Flag Description
0,27,QCL,Crops and livestock products,4,Afghanistan,5510,Production,1802,Sugar cane,2021,2021,tonnes,37638.03,I,Imputed value
1,55,QCL,Crops and livestock products,24,Angola,5510,Production,1802,Sugar cane,2021,2021,tonnes,951172.93,I,Imputed value
2,83,QCL,Crops and livestock products,28,Antigua and Barbuda,5510,Production,1802,Sugar cane,2021,2021,tonnes,0.00,M,"Missing value (data cannot exist, not applicable)"
3,111,QCL,Crops and livestock products,32,Argentina,5510,Production,1802,Sugar cane,2021,2021,tonnes,18627376.62,E,Estimated value
4,139,QCL,Crops and livestock products,36,Australia,5510,Production,1802,Sugar cane,2021,2021,tonnes,31133488.00,A,Official figure
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101,2843,QCL,Crops and livestock products,862,Venezuela (Bolivarian Republic of),5510,Production,1802,Sugar cane,2021,2021,tonnes,3214538.97,I,Imputed value
102,2871,QCL,Crops and livestock products,704,Viet Nam,5510,Production,1802,Sugar cane,2021,2021,tonnes,10740873.28,A,Official figure
103,2899,QCL,Crops and livestock products,887,Yemen,5510,Production,1802,Sugar cane,2021,2021,tonnes,0.00,I,Imputed value
104,2927,QCL,Crops and livestock products,894,Zambia,5510,Production,1802,Sugar cane,2021,2021,tonnes,5102117.71,I,Imputed value


We want to know if this information is actually relevant to mention somewhere. We map those values to get a better idea. 

In [19]:
fig = px.choropleth(data, locations='Area', locationmode='country names', color='Value')
fig.show()

As expected, Brazil is the first producer by far. The map is not really interesting in itself.

## Sugar paper visualization

We wanted to get of grasp of the data in the following paper : Sustainable Valorization of Waste and By-Products from
Sugarcane Processing, Nicoleta Ungureanu, Valentin Vlădut and Sorin-Stefan Biris (can also be found in ../papers/sustainability-14-11089-v2).

We decided to go with a Sankey plot in order to visualize the flow of each component between the different steps of the process. As a first version, we took into account both the products/wastes and their possible valorization.

In [20]:
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=["Juice", "Bagasse", "Straws", "Press mud", "Wastewater",
               "Sugar Production", "Ethanol Production", "Incineration",
               "Bagasse Ash", "Raw Energy (Mechanical)", "Raw Energy (Thermal)", "Syrup", "Molasses", "Sugar", "Vinasse",
               "Ethanol",
               "Biofuel & Cie", "Commercial Sale & Food Production", "Openland disposal", "Fertilizer", "Raw Energy (Electrical)", "Other"],
        color=["rgb(245, 222, 179)", "rgb(255,160,122)", "rgb(240,128,128)", "rgb(233,150,122)", "rgb(143,188,143)",
               "rgb(245, 222, 179)", "rgb(135,206,235)", "rgb(139,69,19)",
               "rgb(169,169,169)", "rgb(189,183,107)", "rgb(189,183,107)", "rgb(255,140,0)", "rgb(128,0,0)",
               "rgb(245, 222, 179)", "rgb(216,191,216)",
               "rgb(135,206,235)",
               "rgb(32,178,170)", "rgb(70,130,180)", "rgb(178,34,34)", "rgb(46,139,87)", "rgb(189,183,107)",
               "rgb(128,128,128)"]
    ),
    link=dict(
        source=[0, 0,       1, 1, 1, 1, 1,      2, 2,   3,  4,      5, 5, 5, 5,         6, 6,           7, 7, 7, 7,         8, 8,        11,     12, 12, 12, 12,     13, 14, 14,     15],
        target=[5, 6,       7, 16, 18, 19, 15,  18, 19, 19, 19,     4, 11, 12, 13,      14, 15,         8, 20, 9, 10,       18, 21,      17,     17, 19, 21, 15,     17, 16, 19,     17],
        value=[26.8, 26.8,  3, 5, 5, 3, 1,      4, 1,   5,  21.924, 21.924, 0.5, 2, 1,  24.74, 2.061,   1, 0.25, 0.25, 1.5, 2.5, 0.5,    0.5,    0.5, 1, 0.25, 0.25, 1,  9, 15.7,    2.25],
        color=[  "rgba(245, 222, 179, 0.6)", "rgba(245, 222, 179, 0.6)", "rgba(255,160,122, 0.6)", "rgba(255,160,122, 0.6)", "rgba(255,160,122, 0.6)", "rgba(255,160,122, 0.6)", "rgba(255,160,122, 0.6)", "rgba(240,128,128,0.6)", "rgba(240,128,128,0.6)",
                 "rgba(233,150,122, 0.6)", "rgba(143,188,143, 0.6)",
                 "rgba(245, 222, 179, 0.6)", "rgba(245, 222, 179, 0.6)", "rgba(245, 222, 179, 0.6)", "rgba(245, 222, 179, 0.6)", "rgba(135,206,235, 0.6)", "rgba(135,206,235, 0.6)", "rgba(139,69,19, 0.6)", "rgba(139,69,19, 0.6)", "rgba(139,69,19, 0.6)", "rgba(139,69,19, 0.6)",
                 "rgba(169,169,169, 0.6)", "rgba(169,169,169, 0.6)", "rgba(255,140,0, 0.6)",
                 "rgba(128,0,0, 0.6)", "rgba(128,0,0, 0.6)", "rgba(128,0,0, 0.6)", "rgba(128,0,0, 0.6)", "rgba(245, 222, 179, 0.6)", "rgba(216,191,216, 0.6)", "rgba(216,191,216, 0.6)",
                 "rgba(135,206,235, 0.6)"]

))])

fig.update_layout(title_text="Waste and by-products, processing of 1 ton of sugarcane", font_size=10)
fig.show()

We decided to focus the aim of the plot on the waste and not on the valorization. Here is a modified version that only shows products and direct waste. We also introduced the names of each step for clarity.

In [21]:
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=[# 0
            "Sugarcane", 
            # 1 2 3 4 5 6
            "Water", "Bagasse", "Straws", "Wastewater", "Mixed juice", "Mill station", 
            # 7 8 9 10 11 12
            "Water", "Chemicals", "Press mud", "Wastewater", "Clarification", "Clear juice",
            # 13 14 15 16
            "Water", "Syrup", "Wastewater", "Evaporation", 
            # 17 18 19 20
            "Water", "Sacarose crystals + syrup", "Wastewater", "Crystallization" 
            # 21 22 23 24
            "Water", "Sugar", "Molasses", "Centrifuge",
        # 25
        "Other"] 
    ),
    link=dict(
        source=[0, 1, 6, 6, 6, 6, 6, 5, 8, 7, 11, 11, 12, 13, 16, 16, 17, 14, 20, 6],
        target=[6, 6, 2, 3, 4, 5, 11, 11, 11, 9, 12, 16, 16, 14, 15, 20, 20, 18, 25],
        value=[1, 0.5, 0.28, 0.28, 0.5, 0.44, 0.44, 0.001, 0.5, 0.03, 0.14, 0.14, 0.5, 0.14, 0.5, 0.5, 0.14, 0.249]
))])

fig.update_layout(title_text="Waste and by-products, processing of 1 ton of sugarcane", font_size=10)
fig.show()

There was an apparent mix up in the values, but the important deduction was that the plot still lacked some design, so we opted for another approach. We switched to Inkscape for the rest of the design.