In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import plotly as py

import holoviews as hv
import plotly.graph_objects as go
import plotly.express as pex
import psutil
import os
import plotly.io as pio

pd.__version__

'1.1.5'

In [23]:
metrics = pd.read_csv("Input/CountrySubsidies_OtherMetrics.csv")
countries_ss = pd.read_csv("Output/Source_sink_countries_CI.csv")

In [24]:
# making 2 groups out of the original 4
metrics.replace('Very Low','Low', inplace = True)
metrics.replace('Very High','High', inplace = True)

In [25]:
countries_metrics = pd.merge(left = countries_ss, right = metrics, how = 'left')
countries_metrics.columns

Index(['Unnamed: 0', 'Country', 'Sink_country', 'Subsidies_EEZ_value',
       'lower_bound_subsidies_USD', 'upper_bound_subsidies_USD', 'Region',
       ' Harmful Subsidies Provided (USD) ',
       ' Harmful Subsidies Burden (USD) ', 'HDI (2017)', 'HDI_group',
       ' Beneficial subsidies (USD) ', ' Ben Subs ', 'EPI fish Stock Status',
       'Stock Status', 'EPI MPAs', 'MPAs'],
      dtype='object')

In [26]:
metrics_1 = countries_metrics[['Country', 'Sink_country', 'Subsidies_EEZ_value', 
                                      'Region', 'HDI_group', ' Ben Subs ','Stock Status','MPAs']]
metrics_burden = metrics[['Country', 'HDI_group', ' Ben Subs ', 'Stock Status','MPAs']]
metrics_burden.columns = ['Sink_country', 'HDI_|group_burden', ' Ben Subs_burden ', 
                           'Stock Status_burden','MPAs_burden']
metrics_1.columns

Index(['Country', 'Sink_country', 'Subsidies_EEZ_value', 'Region', 'HDI_group',
       ' Ben Subs ', 'Stock Status', 'MPAs'],
      dtype='object')

In [27]:
metrics_2 = pd.merge(left = metrics_1, right = metrics_burden, how = 'left')
# metrics_2[metrics_2.EEZ_country_sink.str.contains('high')]
# metrics_3 = metrics_2.fillna('high_seas') # taking all high seas out!
metrics_2.columns

Index(['Country', 'Sink_country', 'Subsidies_EEZ_value', 'Region', 'HDI_group',
       ' Ben Subs ', 'Stock Status', 'MPAs', 'HDI_|group_burden',
       ' Ben Subs_burden ', 'Stock Status_burden', 'MPAs_burden'],
      dtype='object')

In [28]:
metrics_2.columns = ['Country', 'Sink_country', 'Subsidies_USD', 'Region', 'HDI_group',
       ' Ben Subs ', 'Stock Status', 'MPAs', 'HDI_group_burden',
       ' Ben Subs_burden ', 'Stock Status_burden', 'MPAs_burden']

In [29]:
color_link_short = ['lightblue','lightblue',
                    'teal','teal',
                    'lightblue', 'teal',
                    'lightblue','teal']


color_node = ['lightblue','teal',
              'lightblue','teal']

In [44]:
# Stock Status
metrics_SS = metrics_2.groupby(['Stock Status','Stock Status_burden'], as_index = False).sum()
SS = metrics_SS[['Stock Status','Stock Status_burden','Subsidies_USD']]
SS.columns = ['source_SS', 'burden_SS', 'Subsidies_USD']
print(SS.groupby(['source_SS']).sum())
print(SS.groupby(['burden_SS']).sum())

           Subsidies_USD
source_SS               
High        1.423245e+10
Low         6.628348e+09
           Subsidies_USD
burden_SS               
High        1.358287e+10
Low         7.277920e+09


In [31]:
# Stock Status (no high seas)
max_source_index = 0
max_target_index = 0
countries = {}
data_dict = {'source_SS': [], 'target_SS': [], 'data': {'labels': [], 'source': [], 'target': [], 'values': []}}

#  create 2 distinct lists for source and target
for index, row in SS.iterrows():
    if row['source_SS'] not in data_dict['source_SS']:
        data_dict['source_SS'].append(row['source_SS'])
    if row['burden_SS'] not in data_dict['target_SS']:
        data_dict['target_SS'].append(row['burden_SS'])

#  create extended list for labels
data_dict['data']['labels'].extend(data_dict['source_SS'])
data_dict['data']['labels'].extend(data_dict['target_SS'])


for index, row in SS.iterrows():
    source_index = data_dict['source_SS'].index(row['source_SS'])
    target_index = data_dict['target_SS'].index(row['burden_SS'])
        
    data_dict['data']['labels'].append(row['source_SS'])
    data_dict['data']['source'].append(source_index)
    data_dict['data']['target'].append(len(data_dict['source_SS']) + target_index)
    data_dict['data']['values'].append(row['Subsidies_USD'])
print(data_dict['data'])

{'labels': ['High', 'Low', 'High', 'Low', 'High', 'High', 'Low', 'Low'], 'source': [0, 0, 1, 1], 'target': [2, 3, 2, 3], 'values': [12142802169.394707, 2089643410.4772956, 1440071377.8923895, 5188276671.722784]}


In [32]:
# data to dict, dict to sankey stock status 
dd = data_dict['data']
link = dict(source=dd['source'], target=dd['target'], value=dd['values'],color = color_link_short)
node = dict(pad=50, thickness=5, color = color_node)
data = go.Sankey(link=link, node=node)
# plot
fig = go.Figure(data)
fig.update_layout(title="c. Stock Status")
fig.show()

In [43]:
# Beneficial Subsidies
metrics_Ben_sub = metrics_2.groupby([' Ben Subs ',' Ben Subs_burden '], as_index = False).sum()
Ben_sub = metrics_Ben_sub[[' Ben Subs ',' Ben Subs_burden ','Subsidies_USD']]
Ben_sub.columns = ['source_Ben_sub', 'burden_Ben_sub', 'Subsidies_USD']
print(Ben_sub.groupby(['source_Ben_sub']).sum())
print(Ben_sub.groupby(['burden_Ben_sub']).sum())

                Subsidies_USD
source_Ben_sub               
High             1.955313e+10
Low              1.307661e+09
                Subsidies_USD
burden_Ben_sub               
High             1.889724e+10
Low              1.963551e+09


In [34]:
# Beneficial subsidies (no high seas)
max_source_index = 0
max_target_index = 0
countries = {}
data_dict = {'source_Ben_sub': [], 'target_Ben_sub': [], 'data': {'labels': [], 'source': [], 'target': [], 'values': []}}

#  create 2 distinct lists for source and target
for index, row in Ben_sub.iterrows():
    if row['source_Ben_sub'] not in data_dict['source_Ben_sub']:
        data_dict['source_Ben_sub'].append(row['source_Ben_sub'])
    if row['burden_Ben_sub'] not in data_dict['target_Ben_sub']:
        data_dict['target_Ben_sub'].append(row['burden_Ben_sub'])

#  create extended list for labels
data_dict['data']['labels'].extend(data_dict['source_Ben_sub'])
data_dict['data']['labels'].extend(data_dict['target_Ben_sub'])


for index, row in Ben_sub.iterrows():
    source_index = data_dict['source_Ben_sub'].index(row['source_Ben_sub'])
    target_index = data_dict['target_Ben_sub'].index(row['burden_Ben_sub'])
        
    data_dict['data']['labels'].append(row['source_Ben_sub'])
    data_dict['data']['source'].append(source_index)
    data_dict['data']['target'].append(len(data_dict['source_Ben_sub']) + target_index)
    data_dict['data']['values'].append(row['Subsidies_USD'])
print(data_dict['data'])

{'labels': ['High', 'Low', 'High', 'Low', 'High', 'High', 'Low', 'Low'], 'source': [0, 0, 1, 1], 'target': [2, 3, 2, 3], 'values': [18694950616.391537, 858181834.7255615, 202291675.42445636, 1105369502.9456332]}


In [35]:
# data to dict, dict to sankey
dd = data_dict['data']
link = dict(source=dd['source'], target=dd['target'], value=dd['values'],color = color_link_short)
node = dict( pad=50, thickness=5, color = color_node)
data = go.Sankey(link=link, node=node)
# plot
fig = go.Figure(data)
fig.update_layout(title="b. Beneficial subsidies")
fig.show()

In [42]:
# HDI
metrics_HDI = metrics_2.groupby(['HDI_group','HDI_group_burden'], as_index = False).sum()
HDI = metrics_HDI[['HDI_group','HDI_group_burden','Subsidies_USD']]
HDI.columns = ['source_HDI_group', 'burden_HDI_group', 'Subsidies_USD']
print(HDI.groupby(['source_HDI_group']).sum())
print(HDI.groupby(['burden_HDI_group']).sum())

                  Subsidies_USD
source_HDI_group               
High               1.739868e+10
Low                3.462111e+09
                  Subsidies_USD
burden_HDI_group               
High               1.540121e+10
Low                5.459581e+09


In [37]:
# HDI no High seas
max_source_index = 0
max_target_index = 0
countries = {}
data_dict = {'source_HDI': [], 'target_HDI': [], 'data': {'labels': [], 'source': [], 'target': [], 'values': []}}

#  create 2 distinct lists for source and target
for index, row in HDI.iterrows():
    if row['source_HDI_group'] not in data_dict['source_HDI']:
        data_dict['source_HDI'].append(row['source_HDI_group'])
    if row['burden_HDI_group'] not in data_dict['target_HDI']:
        data_dict['target_HDI'].append(row['burden_HDI_group'])

#  create extended list for labels
data_dict['data']['labels'].extend(data_dict['source_HDI'])
data_dict['data']['labels'].extend(data_dict['target_HDI'])


for index, row in HDI.iterrows():
    source_index = data_dict['source_HDI'].index(row['source_HDI_group'])
    target_index = data_dict['target_HDI'].index(row['burden_HDI_group'])
        
    data_dict['data']['labels'].append(row['source_HDI_group'])
    data_dict['data']['source'].append(source_index)
    data_dict['data']['target'].append(len(data_dict['source_HDI']) + target_index)
    data_dict['data']['values'].append(row['Subsidies_USD'])
print(data_dict['data'])

{'labels': ['High', 'Low', 'High', 'Low', 'High', 'High', 'Low', 'Low'], 'source': [0, 0, 1, 1], 'target': [2, 3, 2, 3], 'values': [15387356772.320387, 2011325816.5041344, 13855970.859246718, 3448255069.803413]}


In [38]:
# data to dict, dict to sankey HDI no high seas
dd = data_dict['data']
link = dict(source=dd['source'], target=dd['target'], value=dd['values'],color = color_link_short)
node = dict( pad=50, thickness=5, color = color_node)
data = go.Sankey(link=link, node=node)
# plot
fig = go.Figure(data)
fig.update_layout(title="a. Human Development Index")
fig.show()