In [None]:
import json
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import igraph as ig

from config import DATA_PATH, FIGURE_PATH
from tests.VAT import *
from visual_flow_diagram_helpers import *
from NW_validation_modules import *
from analyses.draw_geomap_subnetwork import canton_aggregated_count_and_column, load_shapefiles

## Feed in production network(s)

In [None]:
years = range(2007,2015+1)

banana_pn_edgelists = pd.DataFrame()
banana_pn_nodelists = pd.DataFrame()
global_edgelists = {}
for y in years:
    banana_pn = ig.read(filename=DATA_PATH / f'pn{y}.graphml', format='graphml')
    banana_pn_edgelists = pd.concat([banana_pn_edgelists, banana_pn.get_edge_dataframe()])
    banana_pn_nodelists = pd.concat([banana_pn_nodelists, banana_pn.get_vertex_dataframe()])
    global_edgelists[y] = pd.read_csv(DATA_PATH / 'firm-level' / f'global_edgelist_{y}.csv', dtype={'id_supplier':str, 'id_customer':str})

In [None]:
#existed_nodes_df = banana_pn_nodelists[['firm_id', 'ISIC4', 'descrip_n4', 'ADM2', 'ADM1']].drop_duplicates()

# Pivot countrywide companies
cw_nodes = load_firm_data_cw(contr_type='sociedades')
#cw_nodes = load_firm_data_cw(contr_type='')
ID_TO_SECTOR = dict(zip(cw_nodes["firm_id"], cw_nodes["ISIC4"]))
SECTOR_TO_DESCR = dict(zip(cw_nodes["ISIC4"], cw_nodes["descrip_n4"]))
ID_TO_ADM = dict(zip(cw_nodes["firm_id"], zip(cw_nodes["ADM2"], cw_nodes["ADM1"])))

sector_color_map = {'A0122':"#ffe135", 'G4630':"#3bba1e",
                    'A0163':"#FFB922", 'G4620':"#3bba1e",
                    'C2220':"#2669AC", 'G4669':"#303030",
                    'G4772':"#158C92", 'S9609':"#AA29B3",
                    'C1702':"#9A7444", 'C1701':"#9A7444",
                    'H4923':"#A0A0A0",
                    }

## Validating

### Money flows diagram

In [None]:
for y in years:
    if y != 2015: continue
    print(y)

    # Custom IDs that will not be aggregated in the visualization
    not_aggregated_IDs = set([
        '1797601',# '1899534'
        ])
    pn_id_set = set(banana_pn_nodelists[banana_pn_nodelists['date']==y]['firm_id'])
    validate_ids(not_aggregated_IDs, pn_id_set)
    visible_nodes_df, visible_flows_df = derive_visualization_data(global_edgelists[y], not_aggregated_IDs,
                                                                   banana_pn_nodelists[banana_pn_nodelists['date']==y],
                                                                   ID_TO_SECTOR, SECTOR_TO_DESCR, ID_TO_ADM, 
                                                                   sector_color_map,
                                                                   show_roe=False
                                                                   )
    
    plot_flows_sankey(visible_nodes_df, visible_flows_df)

### Prod. netw. I/O table

In [None]:
for y in years:
    print(y)
    if y in [2015,2014]:
        pivot = compute_io_table_sector_level(banana_pn_edgelists[banana_pn_edgelists['date']==y])
        if pivot.empty:
            continue
        plot_io_pivottable(pivot, logscale=False,
                        xlabel= 'Customer sector', ylabel= 'Supplier sector', title=f'I/O table {y}')

In [None]:
cw_edges = load_link_data_cw(contr_type='')

cw_edges['sector_supplier'] = cw_edges['id_supplier'].map(ID_TO_SECTOR)
cw_edges['sector_customer'] = cw_edges['id_customer'].map(ID_TO_SECTOR)

In [None]:
y = 2014
pivot = compute_io_table_sector_level(cw_edges[cw_edges['date']==y], isic_lev=2)
#pivot = pivot.loc[:, ['A0122', 'A0127']]
pivot = pivot.loc[:, ['A01', 'A02']]
pivot[pivot > 50000].dropna()
pivot = pivot.dropna()
rel_inputs = ['A0122', 'A0127', 'A0130',
              'A0161', 'A0163', 'A0164',
              'C2011','C2013','C2021',
              'C2220',
             'G4510', 'G4530', 'G4610', 'G4620', 'G4630', 'G4641', 'G4649',
             'G4651', 'G4652', 'G4653', 'G4659', 'G4661', 'G4662', 'G4663',
             'G4669', 'G4690', 'G4711', 'G4719', 'G4721', 'G4722', 'G4723',
             'G4730', 'G4741', 'G4742', 'G4751', 'G4752', 'G4753', 'G4759',
             'G4761', 'G4762', 'G4763', 'G4764', 'G4771', 'G4772', 'G4773',
             'G4774', 'G4781', 'G4782', 'G4789', 'G4791', 'G4799',
            'H4911', 'H4912', 'H4921', 'H4922', 'H4923', 'H4930',
            'H5011', 'H5012', 'H5021', 'H5022',
            'H5110', 'H5120',
            'H5210', 'H5221', 'H5222', 'H5223', 'H5224', 'H5229'
]
#pivot = pivot.loc[pivot.index.intersection(rel_inputs), :]


logscale=False,
xlabel= 'Customer sector'
ylabel= 'Supplier sector'
title= f'I/O table {y}'

logscale = False
x_labels = pivot.columns
y_labels = pivot.index
matrix = pivot.values

# Apply log scale
data_matrix = matrix
if logscale:
    data_matrix = np.log10(matrix)

# Plot
fig, ax = plt.subplots(figsize=(4, 20))
im = ax.imshow(data_matrix, cmap="viridis")

# Ticks and labels
ax.set_xticks(np.arange(len(x_labels)))
ax.set_yticks(np.arange(len(y_labels)))
ax.set_xticklabels(x_labels, rotation=45, ha="right", rotation_mode="anchor")
ax.set_yticklabels(y_labels)
if xlabel:
    ax.set_xlabel(xlabel)
if ylabel:
    ax.set_ylabel(ylabel)

# Annotate each cell with the value
for i in range(len(y_labels)):
    for j in range(len(x_labels)):
        val = data_matrix[i, j]
        if not np.isnan(val):
            if logscale:
                ax.text(j, i, f"{val:.1f}", ha="center", va="center", color="w")
            else:
                ax.text(j, i, f"{val:.1e}", ha="center", va="center", color="w", fontsize=5)

ax.set_title(title)
fig.tight_layout()
plt.show()

In [None]:
#intermediate_demand['sector_code'] = intermediate_demand['CICN'].map()
dict_io_sect_df = pd.read_excel(DATA_PATH / 'ECU_MIP_71x71.xlsx', sheet_name='sector_maps', dtype={'Código': str})
dict_io_sect = dict(zip(dict_io_sect_df['Código'], dict_io_sect_df['CIIU Rev 4 ']))


years_ex = range(2013, 2019+1)

# First, check which years have data
available_years = []
for y in years_ex:
    try:
        pd.read_excel(DATA_PATH / 'ECU_MIP_71x71.xlsx', sheet_name=str(y))
        available_years.append(y)
    except:
        pass

available_years = available_years[:-1]

# Create subplots only for available years
fig, ax = plt.subplots(nrows=len(available_years), ncols=1, 
                       figsize=(10, len(available_years)*4), sharex=True)
fig.suptitle('Supplying sectors')#('Banana column (supplying sectors)')

# Handle case where there's only one subplot
if len(available_years) == 1:
    ax = [ax]
for i, y in enumerate(available_years):
    try: 
        io_table = pd.read_excel(DATA_PATH / 'ECU_MIP_71x71.xlsx', header=0, dtype={'No. de orden':float}, sheet_name=str(y))
    except:
        ax[i].set_axis_off()
        continue
    io_table = (io_table
                .rename({'Unnamed: 2': 'sector'}, axis=1)
                #.drop(['No. de orden'], axis=1)
    )
    io_table['code'] = io_table['CICN']
    # Define numeric_cols BEFORE using it
    numeric_cols = [col for col in io_table.columns if isinstance(col, (int, float))]

    # Fix the filtering: use .loc and filter rows, then select columns
    intermediate_demand = io_table.loc[~(io_table['No. de orden']).isna(), ['No. de orden', 'CICN', 'sector'] + numeric_cols]
    banana_row = intermediate_demand.iloc[0, 3:]
    banana_col = intermediate_demand.iloc[:, 3]
    intermediate_demand['ISIC'] = intermediate_demand['CICN'].map(dict_io_sect)

    banana_col.index= intermediate_demand['No. de orden'].astype(int).astype(str) + ' (' + intermediate_demand['ISIC'].str[0:10] +') ' + intermediate_demand['sector'].str[0:30]
    banana_row.index= intermediate_demand['No. de orden'].astype(int).astype(str) + ' (' + intermediate_demand['ISIC'].str[0:10] +') '+ intermediate_demand['sector'].str[0:30]
    #banana_row.plot(kind='bar', ax=ax[0])
    #ax[0].set_title('Banana row (customer sectors)')
    banana_col.plot(kind='bar', ax=ax[i])
    ax[i].set_title(y); ax[i].set_ylabel('USD (K$)')
    ax[i].grid(); ax[i].set_axisbelow(True)
plt.tight_layout()
plt.show()

In [None]:
matrix_data = intermediate_demand.set_index('No. de orden')[numeric_cols]

import networkx as nx
# Create a directed graph (flows FROM sectors TO sectors)
G = nx.DiGraph()

# Add nodes (sectors)
sectors = matrix_data.index.tolist()
io_idx_to_sector = dict(zip(intermediate_demand['No. de orden'], intermediate_demand['sector']))

G.add_nodes_from(sectors)

# Add edges (flows between sectors)
for i, source_sector in enumerate(matrix_data.index):
    for j, target_sector in enumerate(numeric_cols):
        flow_value = matrix_data.iloc[i, j]
        
        # Only add edge if there's actual flow (non-zero)
        if flow_value > 0:
            # You might want to map column numbers to sector names
            G.add_edge(source_sector, target_sector, weight=flow_value)

# Basic network statistics
print(f"Number of nodes: {G.number_of_nodes()}")
print(f"Number of edges: {G.number_of_edges()}")
print(f"Network density: {nx.density(G):.3f}")

#nx.write_gml(G, DATA_PATH / 'io_network_2014.gml')

### Capturing Banana growing and wholesale firms and locations (compared with offic. data)

In [None]:
# Define your province colors upfront (modify as needed)
PROVINCE_COLORS = {
    "Guayas": plt.cm.tab10.colors[0],
    "El Oro": plt.cm.tab10.colors[1],
    "Los Rios": plt.cm.tab10.colors[2],
    "Pichincha": plt.cm.tab10.colors[3],
    "Manabi": plt.cm.tab10.colors[4],
    "Others": 'grey'    # Add more provinces as needed
}

In [None]:
notable_prov = set(PROVINCE_COLORS.keys())

for sector in ['A0122', 'G4630']:    
    # Group sector PN data into (year, province)
    province_df_sect = group_sectorfirms_per_province(banana_pn_nodelists, sector=sector,
                                                      col_sum='cw_s_out',  # COUNTRYWIDE
                                                      notable_prov=notable_prov) 
    # Pivot prod. net. companies
    province_df_sect = province_df_sect[province_df_sect['element'] == 'count']
    pivot_1 = province_df_sect.pivot_table(index="year", columns="province", values="value", aggfunc='sum').fillna(0)

    # Load official data
    cfn_empresas = load_cfn_empresas_per_provincia(sector)
    pivot_2 = cfn_empresas.pivot_table(index="year", columns="province", values="value", aggfunc='sum').fillna(0)

    plot_stacked_bar_comparison(pivot_1, pivot_2, sector, column_color=PROVINCE_COLORS)

In [None]:
sector = None#'A0122'
province_df_sect = group_sectorfirms_per_province(cw_nodes, sector=sector, col_sum='cw_s_out',
                                                  notable_prov=PROVINCE_COLORS.keys())

province_df_sect = province_df_sect[province_df_sect['element'] == 'count']
pivot_1 = province_df_sect.pivot_table(index="year", columns="province", values="value", aggfunc='sum').fillna(0)

In [None]:
plot_stacked_bar(pivot_1, column_color=PROVINCE_COLORS, title_descr=f'Countrywide{"" if sector is None else " " + sector}')

### Micro-small-medium-large enterprise

In [None]:
A0122_nodelist = banana_pn_nodelists[banana_pn_nodelists['ISIC4'] == 'A0122'].copy()

# Group by year and create rankings
fig, ax = plt.subplots(figsize=(6, 4))

for y in years:
    A0122_nodelist_y = A0122_nodelist[A0122_nodelist['date']==int(y)]
    col = 'cw_s_out'
    A0122_nodelist_y = A0122_nodelist_y.sort_values('cw_s_out', ascending=False).reset_index(drop=True)
    A0122_nodelist_y['rank'] = range(1, len(A0122_nodelist_y) + 1)

    ax.plot(A0122_nodelist_y['rank'], A0122_nodelist_y[col], marker='o', label=str(y), alpha=0.7)
ax.set(xlabel='Rank', ylabel=f'{col} Value', title=f'{col} Values by Rank for Each Year',
       #xscale='log', 
       yscale='log')
ax.legend(title='Year', bbox_to_anchor=(1.05, 1), loc='upper left')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

### Export

#### Groundtruth

In [None]:
pd.set_option('display.max_rows', 11)
bce_df = pd.read_csv(f'{DATA_PATH}/Evolución de la Balanza Comercial/transcribed_data.csv')
bce_df['valor [$]'] = (bce_df['valor']*1e3)
bce_df['price_USD_calc'] = (bce_df['valor']/bce_df['volume_TM']).round(1)

from analyses.FAOSTAT import load_faostat_data
fao_trade_df = load_faostat_data('FAOSTAT_banana_export_ecuador.csv')
fao_trade_df = fao_trade_df[fao_trade_df['Element'] == 'Export value'].sort_values(by='Year')
fao_trade_df = fao_trade_df[['Year', 'Value']]
fao_trade_df['Value [$]'] = fao_trade_df['Value'] * 1000
fao_trade_df = fao_trade_df.groupby('Year').sum()

trade_df = pd.read_csv(f'{DATA_PATH}' +
                       '/Trade, embodied cropland, and eHANPP Bananas/Bananas_trade_EmbodiedArea_eHANPP.csv',
                       index_col=0,
                       dtype={'primary_product_Code':str, 'primary_product':str,
                              'Year':int,
                              'Origin':str, 'Origin_code_FAO':str,
                              'Destination':str, 'Destination_code_FAO':str,
                              'Final_use':str,
                              'tonnes_traded_dm_by_final_uses_adjusted_for_seeds_and_losses':float,
                              'embodied_physical_cropped_area_ha_by_final_uses':float,
                              'eHANPP_t_dm_by_final_uses':float,
                              })
trade_df = trade_df[trade_df['primary_product'] == 'Bananas'].drop(['primary_product_Code', 'primary_product',
                                                                    'Origin_code_FAO', 'Destination_code_FAO',],axis=1)
trade_df_ecu = trade_df[trade_df['Origin'] == 'Ecuador'].sort_values(['Year'])
#[trade_df_ecu['tonnes_traded_dm_by_final_uses_adjusted_for_seeds_and_losses'] < 0]['tonnes_traded_dm_by_final_uses_adjusted_for_seeds_and_losses'].sum()
trade_df_ecu = trade_df_ecu.drop(['Origin','Destination', 'Final_use'], axis=1).groupby('Year').sum()

#### Proxy

In [None]:
# Candidate exporters
banana_pn_G4630firms = cw_nodes[cw_nodes['ISIC4'] == 'G4630'].copy()
banana_pn_G4630firms = banana_pn_nodelists[banana_pn_nodelists['ISIC4'] == 'G4630'].copy()
# Candidate export value
prefix = ('cw_', '')[0]
banana_pn_G4630firms['missing_cw_s_out'] = np.where(banana_pn_G4630firms[f'{prefix}s_in'] > banana_pn_G4630firms[f'{prefix}s_out'], 
                                                    banana_pn_G4630firms[f'{prefix}s_in'] - banana_pn_G4630firms[f'{prefix}s_out'], 0)

# Trend
_, ax = plt.subplots()
ax.plot((banana_pn_G4630firms.groupby('date')['missing_cw_s_out'].sum()), marker='o', label=r'Missing revenue (G4630)')
xlim = ax.get_xlim()
ax.plot(bce_df['year'], bce_df['valor [$]'], marker='o', label='Trade balance (Central Bank Ecu)',ls='--')
ax.plot(fao_trade_df.index, fao_trade_df['Value [$]'], marker='o', label= 'FAO export value', ls='--')
#ax2 = ax.twinx()

#ax2.plot(trade_df_ecu.index, trade_df_ecu['tonnes_traded_dm_by_final_uses_adjusted_for_seeds_and_losses'], marker='o', label='Trade export volume (tonnes)', color = 'purple')
#ax2.legend(loc='lower left')

ax.set(xlabel='Years', ylabel='USD', xlim=xlim, 
       title='Banana exported value across the years')
ax.legend(loc='upper right')
ax.grid()
plt.savefig(FIGURE_PATH / 'model_validation' / 'export_missing_s_correlation.png', format='png', dpi=300)
plt.show()


### Banana cultivated area

### Where are A0122's customers in G4630 located? How much do they buy?

In [None]:
adm1_geodf, adm2_geodf = load_shapefiles()

for y in years:
    if y!= 2015: continue
    pn_edgelist_y = global_edgelists[y][(global_edgelists[y]['inPN_supplier'] == True) & (global_edgelists[y]['inPN_customer'] == True)]
    A0122_to_G4630_flows_y = pn_edgelist_y[(pn_edgelist_y['sector_supplier'] == 'A0122') & (pn_edgelist_y['sector_customer'] == 'G4630')]
    A0122customers_inG4630 = set(A0122_to_G4630_flows_y['id_customer'].unique())
    A0122customers_inG4630_df = banana_pn_nodelists[(banana_pn_nodelists['date'] == y) & 
                                                    (banana_pn_nodelists['firm_id'].isin(A0122customers_inG4630))]
    A0122customers_inG4630_df = (A0122customers_inG4630_df.merge(
        A0122_to_G4630_flows_y.groupby('id_customer')['weight'].sum(), left_on='firm_id', right_on='id_customer')
        .rename(columns={'weight':'ban_supply'})
        .sort_values(by='ban_supply', ascending=False))

In [None]:
canton_aggregated_count_and_column(A0122customers_inG4630_df, adm2_geodf, adm1_geodf,
                                   logscale=True, year=y, col_fun1 = ('ban_supply', 'sum'))

## Network analysis

In [None]:
banana_pn = ig.read(filename=DATA_PATH / f'pn{y}.graphml', format='graphml')
print('N, L:', banana_pn.vcount(), banana_pn.ecount())
#print('\n', banana_pn.vertex_attributes(), '\n',banana_pn.edge_attributes(), )

for i, attr in enumerate(['s', 'k']):
    banana_pn.vs[f'{attr}_tot'] = np.array(banana_pn.vs[f'{attr}_out']) + np.array(banana_pn.vs[f'{attr}_in'])

In [None]:
fig, ax = plt.subplots(ncols=2, nrows=3, figsize=(5, 6), sharex='col', sharey='row')
for j, attr in enumerate(['s', 'k']):
    for i, dir in enumerate(['in', 'out', 'tot']):
        vals = np.sort(banana_pn.vs[f'{attr}_{dir}'])
        unique_vals, counts = np.unique(vals, return_counts=True)
        # Calculate CCDF: for each unique value, what fraction of nodes have value > x
        ccdf = np.array([np.sum(vals > v) / len(vals) for v in unique_vals])
        ax[i, j].scatter(unique_vals, ccdf, s=4)
        ax[i, j].set(xscale='log', yscale='log', axisbelow=True)
        ax[i, j].grid(lw=.5)
        ax[i, 0].set_ylabel(f'CCDF ({dir})')
ax[0, 0].set_title('Node strength'); ax[0,1].set_title('Node degree')
plt.tight_layout()
plt.show()

s_attrs = ['s_tot', 's_in', 's_out']
k_attrs = ['k_tot', 'k_in', 'k_out']

fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(7, 6), sharex='col', sharey='row')

for i, s_attr in enumerate(s_attrs):
    for j, k_attr in enumerate(k_attrs):
        ax[i, j].scatter(banana_pn.vs[k_attr],
                         banana_pn.vs[s_attr],
                         s=4)
        ax[i, j].set(axisbelow=True, xscale='log', yscale='log')
        ax[i, j].grid(lw=.5)
for it in range(3):
    ax[it, 0].set_ylabel(s_attrs[it])
    ax[2, it].set_xlabel(k_attrs[it])
plt.tight_layout()
plt.show()


### PN components

In [None]:
components = banana_pn.connected_components(mode="weak")
components_sorted = sorted(components, key=len, reverse=True)
components_length = [len(c) for c in components_sorted]
for i, component in enumerate(components_sorted):
    for node in component:
        banana_pn.vs[node]['component'] = i

In [None]:
banana_pn_vert_df = banana_pn.get_vertex_dataframe()
banana_pn_disconnected_vs_df = banana_pn_vert_df[banana_pn_vert_df['component'] != 0].copy()
banana_pn_disconnected_vs_df['cw_s_tot'] = (banana_pn_disconnected_vs_df['cw_s_out'] + banana_pn_disconnected_vs_df['cw_s_in'])
banana_pn_disconnected_vs_df = banana_pn_disconnected_vs_df.sort_values(by='cw_s_tot', ascending=False)
banana_pn_disconnected_vs_df.head(8)

In [None]:
print(banana_pn_disconnected_vs_df['cw_s_tot'].sum()/1e6)
_, ax = plt.subplots(figsize=(5,3))
ax.scatter(range(len(banana_pn_disconnected_vs_df)),
            banana_pn_disconnected_vs_df['cw_s_tot'],
            c=banana_pn_disconnected_vs_df['custom_color'].values,
            marker='o',s=50)
#ax.set(xlim=(0, 50), axisbelow=True, yscale='log')
ax.grid()
plt.show()

In [None]:
# check outneighbors in PN of disconnected nodes!

## Out of the PN edges

In [None]:
year = 2015
global_edgelist_y = global_edgelists[year]
pn_and_roe_edgelist_y = global_edgelist_y[(global_edgelist_y['inPN_supplier'] == True) ^ (global_edgelist_y['inPN_customer'] == True)]

In [None]:
id = '864345'
tmp_df = pn_and_roe_edgelist_y[(pn_and_roe_edgelist_y['id_supplier'] == id) |
                               (pn_and_roe_edgelist_y['id_customer'] == id) ]
print(set(tmp_df.sector_customer).union(tmp_df.sector_supplier))
print(SECTOR_TO_DESCR['K6419'])
tmp_df

In [None]:
tmp_df = pn_edgelist_y[(pn_edgelist_y['id_supplier'] == id) |
                       (pn_edgelist_y['id_customer'] == id) ]
tmp_df

In [None]:
A0122customers_inroe = (pn_and_roe_edgelist_y[pn_and_roe_edgelist_y['sector_supplier'] == 'A0122']
                        .groupby('sector_customer', as_index=False)
                        .agg({'weight':'sum', 'sector_customer':'first','id_customer': 'nunique'})
                        .sort_values(by='weight', ascending=False))
A0122customers_inroe['sector_customer_descr'] = A0122customers_inroe['sector_customer'].map(SECTOR_TO_DESCR)

A0122suppliers_inroe = (pn_and_roe_edgelist_y[pn_and_roe_edgelist_y['sector_customer'] == 'A0122']
                        .groupby('sector_supplier', as_index=False)
                        .agg({'weight':'sum', 'sector_supplier':'first','id_supplier': 'nunique'})
                        .sort_values(by='weight', ascending=False))
A0122suppliers_inroe['sector_supplier_descr'] = A0122suppliers_inroe['sector_supplier'].map(SECTOR_TO_DESCR)

#display(A0122customers_inroe.head(10))
display(A0122suppliers_inroe.head(10))

In [None]:
C2220customers_inroe = (pn_and_roe_edgelist_y[pn_and_roe_edgelist_y['sector_supplier'] == 'C2220']
                        .groupby('sector_customer', as_index=False)
                        .agg({'weight':'sum', 'sector_customer':'first','id_customer': 'nunique'})
                        .sort_values(by='weight', ascending=False))
C2220customers_inroe['sector_customer_descr'] = C2220customers_inroe['sector_customer'].map(SECTOR_TO_DESCR)

C2220suppliers_inroe = (pn_and_roe_edgelist_y[pn_and_roe_edgelist_y['sector_customer'] == 'C2220']
                        .groupby('sector_supplier', as_index=False)
                        .agg({'weight':'sum', 'sector_supplier':'first','id_supplier': 'nunique'})
                        .sort_values(by='weight', ascending=False))
C2220suppliers_inroe['sector_supplier_descr'] = C2220suppliers_inroe['sector_supplier'].map(SECTOR_TO_DESCR)

#display(C2220suppliers_inroe.head(10))
#display(C2220customers_inroe.head(10))

In [None]:
G4630customers_inroe = (pn_and_roe_edgelist_y[pn_and_roe_edgelist_y['sector_supplier'] == 'G4630']
                        .groupby('sector_customer', as_index=False)
                        .agg({'weight':'sum', 'sector_customer':'first','id_customer': 'nunique'})
                        .sort_values(by='weight', ascending=False)
                        )
G4630customers_inroe['sector_customer_descr'] = G4630customers_inroe['sector_customer'].map(SECTOR_TO_DESCR)

G4630suppliers_inroe = (pn_and_roe_edgelist_y[pn_and_roe_edgelist_y['sector_customer'] == 'G4630']
                        .groupby('sector_supplier', as_index=False)
                        .agg({'weight':'sum', 'sector_supplier':'first','id_supplier': 'nunique'})
                        .sort_values(by='weight', ascending=False)
                        )
G4630suppliers_inroe['sector_supplier_descr'] = G4630suppliers_inroe['sector_supplier'].map(SECTOR_TO_DESCR)

G4630suppliers_inroe['weight'].sum()/1e9

#display(G4630suppliers_inroe.head(15))
#display(G4630customers_inroe.head(15))

In [None]:
G4669customers_inroe = (pn_and_roe_edgelist_y[pn_and_roe_edgelist_y['sector_supplier'] == 'G4669']
                        .groupby('sector_customer', as_index=False)
                        .agg({'weight':'sum', 'sector_customer':'first','id_customer': 'nunique'})
                        .sort_values(by='weight', ascending=False)
                        )
G4669customers_inroe['sector_customer_descr'] = G4669customers_inroe['sector_customer'].map(SECTOR_TO_DESCR)

G4669suppliers_inroe = (pn_and_roe_edgelist_y[pn_and_roe_edgelist_y['sector_customer'] == 'G4669']
                        .groupby('sector_supplier', as_index=False)
                        .agg({'weight':'sum', 'sector_supplier':'first','id_supplier': 'nunique'})
                        .sort_values(by='weight', ascending=False)
                        )
G4669suppliers_inroe['sector_supplier_descr'] = G4669suppliers_inroe['sector_supplier'].map(SECTOR_TO_DESCR)

G4669suppliers_inroe['weight'].sum()/1e9

#display(G4669suppliers_inroe.head(15))
#display(G4669customers_inroe.head(15))

In [None]:
G4772suppliers_inroe = (pn_and_roe_edgelist_y[pn_and_roe_edgelist_y['sector_customer'] == 'G4772']
                        .groupby('sector_supplier', as_index=False)
                        .agg({'weight':'sum', 'sector_supplier':'first','id_supplier': 'nunique'})
                        .sort_values(by='weight', ascending=False)
                        )
G4772suppliers_inroe['sector_supplier_descr'] = G4772suppliers_inroe['sector_supplier'].map(SECTOR_TO_DESCR)
#G4772suppliers_inroe