In [1]:
import pandas as pd
import plotly.graph_objects as go

In [4]:
#import df derived from Statistics Canada physical flow account for plastic material, by product category, 
#from .csv, release date: 2024-03-18
#code used to process is in 'multiIndex_v4_from_to_StatCan_product_category' (2024-05-07 10:38AM)
merged_prod_data_2020_CA_Total = pd.read_excel('38100150_df_2020_CA_TotalPlastics.xlsx')
merged_prod_data_2020_CA_Total.reset_index()

Unnamed: 0,index,Variable,UOM,COORDINATE,VALUE,From,To
0,0,Plastic in domestically produced products,Tonnes,1.1.1,4474733,CA domestic production,CA consumption
1,1,Net trade (imports less exports) of newly prod...,Tonnes,1.2.1,2646334,Net trade of new products,CA consumption
2,2,Plastic in products produced for Canadian cons...,Tonnes,1.3.1,7121068,,
3,3,Net stock of plastic in products that remain i...,Tonnes,1.4.1,2193412,CA consumption,Net in-use stock addition
4,4,Total discarded plastic in products,Tonnes,1.5.1,4927656,CA consumption,Total discarded plastics
5,5,Plastic leaked permanently into the environment,Tonnes,1.6.1,40367,Total discarded plastics,Leak into environment
6,6,Collected plastic sent directly for disposal o...,Tonnes,1.7.1,4887289,Total discarded plastics,Collection
7,7,Diverted plastic waste and scrap sent for mate...,Tonnes,1.8.1,1248665,Collection,Material recovery
8,8,Disposed plastic waste and scrap not diverted ...,Tonnes,1.9.1,3636560,Collection,Total disposed plastics
9,9,Sorted and baled plastic waste and scrap sent ...,Tonnes,1.10.1,439412,Material recovery,Recycling


In [5]:
#turn VALUE into absolute values
merged_prod_data_2020_CA_Total['VALUE'] =merged_prod_data_2020_CA_Total['VALUE'].apply(lambda x: abs(x))
merged_prod_data_2020_CA_Total

Unnamed: 0,Variable,UOM,COORDINATE,VALUE,From,To
0,Plastic in domestically produced products,Tonnes,1.1.1,4474733,CA domestic production,CA consumption
1,Net trade (imports less exports) of newly prod...,Tonnes,1.2.1,2646334,Net trade of new products,CA consumption
2,Plastic in products produced for Canadian cons...,Tonnes,1.3.1,7121068,,
3,Net stock of plastic in products that remain i...,Tonnes,1.4.1,2193412,CA consumption,Net in-use stock addition
4,Total discarded plastic in products,Tonnes,1.5.1,4927656,CA consumption,Total discarded plastics
5,Plastic leaked permanently into the environment,Tonnes,1.6.1,40367,Total discarded plastics,Leak into environment
6,Collected plastic sent directly for disposal o...,Tonnes,1.7.1,4887289,Total discarded plastics,Collection
7,Diverted plastic waste and scrap sent for mate...,Tonnes,1.8.1,1248665,Collection,Material recovery
8,Disposed plastic waste and scrap not diverted ...,Tonnes,1.9.1,3636560,Collection,Total disposed plastics
9,Sorted and baled plastic waste and scrap sent ...,Tonnes,1.10.1,439412,Material recovery,Recycling


In [6]:
#reduce df to only VALUE, From and To that are needed for the Sankey
links = pd.DataFrame(merged_prod_data_2020_CA_Total, columns=['VALUE','From','To'])
links

Unnamed: 0,VALUE,From,To
0,4474733,CA domestic production,CA consumption
1,2646334,Net trade of new products,CA consumption
2,7121068,,
3,2193412,CA consumption,Net in-use stock addition
4,4927656,CA consumption,Total discarded plastics
5,40367,Total discarded plastics,Leak into environment
6,4887289,Total discarded plastics,Collection
7,1248665,Collection,Material recovery
8,3636560,Collection,Total disposed plastics
9,439412,Material recovery,Recycling


In [7]:
#remove rows where From (and To) are "NaN"
links = links.dropna(subset=['From'])
links

Unnamed: 0,VALUE,From,To
0,4474733,CA domestic production,CA consumption
1,2646334,Net trade of new products,CA consumption
3,2193412,CA consumption,Net in-use stock addition
4,4927656,CA consumption,Total discarded plastics
5,40367,Total discarded plastics,Leak into environment
6,4887289,Total discarded plastics,Collection
7,1248665,Collection,Material recovery
8,3636560,Collection,Total disposed plastics
9,439412,Material recovery,Recycling
10,809253,Material recovery,Total disposed plastics


In [8]:
#create unique list of source/from and target/to for plotly
unique_from_to = list(pd.unique(links[['From','To']].values.ravel('K')))
unique_from_to

['CA domestic production',
 'Net trade of new products',
 'CA consumption',
 'Total discarded plastics',
 'Collection',
 'Material recovery',
 'Recycling',
 'Total disposed plastics',
 'Net in-use stock addition',
 'Leak into environment',
 'Net trade baled plastic',
 'Secondary feedstock',
 'Landfill or incineration',
 'Incineration (WtE) or gasification',
 'Net trade disposed plastics']

In [9]:
mapping_dict={k:v for v,k in enumerate(unique_from_to)}
mapping_dict
links['From']=links['From'].map(mapping_dict)
links['To']=links['To'].map(mapping_dict)
links_dict=links.to_dict(orient='list')
links_dict

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  links['From']=links['From'].map(mapping_dict)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  links['To']=links['To'].map(mapping_dict)


{'VALUE': [4474733,
  2646334,
  2193412,
  4927656,
  40367,
  4887289,
  1248665,
  3636560,
  439412,
  809253,
  254,
  362250,
  74791,
  3916862,
  107577,
  496166],
 'From': [0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7],
 'To': [2, 2, 8, 3, 9, 4, 5, 7, 6, 7, 10, 11, 7, 12, 13, 14]}

In [12]:
#test of basic plotly Sankey
#updated to show unit (t) and color in grey (#7F7F7F) used for unspecified/all in CIRAIG colour set
fig = go.Figure(data=[go.Sankey(
    valueformat = None,
    valuesuffix = 't', #all flows in unit tonnes
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "#7F7F7F", width = 0.5),
      label = unique_from_to,
      color = "#7F7F7F"
    ),
    link = dict(
      source = links_dict['From'], # indices correspond to labels
      target = links_dict['To'],
      value = links_dict['VALUE']
  ))])

fig.update_layout(title_text="Sankey Diagram, Total plastics in products, 2020, Canada", font_size=10)
fig.show()

In [17]:
#update layout (make wider)
fig.update_layout(
    width=1100,
    height=500,
    font=dict(size=10),
    title="",
    plot_bgcolor='rgba(0,0,0,0)'
)

# Display the figure
fig.show()

In [19]:
#update layout (make wider)
fig.update_layout(
    width=1100,
    height=400,
    font=dict(size=12),
    title="",
    plot_bgcolor='rgba(0,0,0,0)'
)

# Display the figure
fig.show()