In [1]:
import os
import sys
import time
import math
import numpy as np
import pandas as pd
import geopandas as gpd

folder = "C:\\Users\\celian\\OneDrive\\WorldBank\\Cambodia"
io_folder = os.path.join(folder, "Data", "Structured", "InputOutputTable")

### Export sector table

In [2]:
sector = pd.read_excel(os.path.join(io_folder, "InputOutputTable.xlsx"), sheet_name="sector")
sector.loc[sector['trigram'].duplicated(keep=False),:]
sector = sector[['trigram', 'name', 'type']]
sector = sector.rename(columns={"trigram":"sector"})
sector
#sector.to_csv(os.path.join(io_folder, 'sector_table.csv'), index=False)

Unnamed: 0,sector,name,type
0,CRO,Crops,agriculture
1,LIV,Livestock and Poultry,agriculture
2,FOR,Forestry and logging,primary
3,FIS,Fishing and aquaculture,primary
4,MIN,Mining and quarrying,primary
5,MFO,Manufacture of food products,manufacturing
6,MBE,Manufacture of beverages,manufacturing
7,MTO,Manufacture of tobacco products,manufacturing
8,MTE,"Manufacture of textiles, leather, wearing appa...",manufacturing
9,MWO,Manufacture of wood and of products of wood an...,manufacturing


### Export country trigrams

In [18]:
country = pd.read_excel(os.path.join(io_folder, "InputOutputTable.xlsx"), sheet_name="country")
country[['country_name','trigram']].to_csv(os.path.join(io_folder, 'country.csv'), index=False)

### Export final demand

In [22]:
df = pd.read_excel(os.path.join(io_folder, "InputOutputTable.xlsx"), sheet_name="final_demand")
col = {'trigram':"sector", 'final_demand_USD':'final_demand'}
df = df[list(col.keys())]
df = df.rename(columns=col)
df.to_csv(os.path.join(io_folder, 'final_demand.csv'), index=False)

### Export Tech Coef Matrix

In [25]:
df = pd.read_excel(os.path.join(io_folder, "InputOutputTable.xlsx"), sheet_name="domestic_techCoef")
df = df.loc[df.index != 'check',:]
df.to_csv(os.path.join(io_folder, 'tech_coef_matrix.csv'), index=True)

In [26]:
df = pd.read_excel(os.path.join(io_folder, "InputOutputTable.xlsx"), sheet_name="domestic_techCoef")
df = df.loc[df.index != 'check',:]
df = df.unstack().reset_index()
df.columns = ['buying_sector', 'supplying_sector', 'flow']
df.to_csv(os.path.join(io_folder, 'tech_coef_table.csv'), index=False)

### Export Flow Matrix Btw Sector For Analysis

In [13]:
df = pd.read_excel(os.path.join(io_folder, "InputOutputTable.xlsx"), sheet_name="ioTable", skiprows=1)
df = df[df.iloc[:,0]=="domestic"]
df.index = df.iloc[:,1].astype(int)
df = df.iloc[:,3:63]
df = df.unstack().reset_index()
df.columns = ['buying_sector', 'supplying_sector', 'flow']
df

# map sector
sector = pd.read_excel(os.path.join(io_folder, "InputOutputTable.xlsx"), sheet_name="sector")
sector.loc[sector['trigram'].duplicated(keep=False),:]
sector = sector['trigram']
df['buying_sector'] = (df['buying_sector']-1).map(sector)
df['supplying_sector'] = (df['supplying_sector']-1).map(sector)

# convert to USD
df['flow'] = df['flow'] * (1000000/4067.8)


#df.to_csv(os.path.join(io_folder, 'intermediary_flows.csv'), index=False)
#df = df.loc[df.index != 'check',:]
#df.to_csv(os.path.join(io_folder, 'tech_coef_matrix.csv'), index=True)

In [14]:
# keep only largest sector
sector_table = pd.read_csv(os.path.join(io_folder, 'sector_table.csv'))
boolean = sector_table['output']/sector_table['output'].sum() > 0.01
largest_sectors = sector_table.loc[boolean, 'sector'].tolist()
print(largest_sectors)

boolean_sec = df['buying_sector'].isin(largest_sectors) & df['supplying_sector'].isin(largest_sectors)
print(df.shape, df[boolean_sec].shape)

# eliminate flow too litlle
boolean_flow = df['flow'] > 100000
print(df.shape, df[boolean_flow].shape, df[boolean_sec & boolean_flow].shape)

['CRO', 'LIV', 'FOR', 'FIS', 'MIN', 'MFO', 'MBE', 'MTE', 'MRU', 'ELE', 'CON', 'TRW', 'TRL', 'ACF', 'TEL', 'FIN', 'REA', 'TRA', 'ADM', 'EDU', 'HEA', 'OPS']
(3600, 3) (484, 3)
(3600, 3) (993, 3) (339, 3)


In [18]:
rename_sec = {
    "CRO": "Agriculture",
    "LIV": "Livestock",
    "FOR": "Forestry & Logging",
    "FIS": "Fishing",
    "MIN": "Mining",
    "MFO": "Man. of Food Products",
    "MBE": "Man. of Beverages",
    "MTE": "Man. of Textiles",
    "MRU": "Man. of Rubber & Plastics",
    "ELE": "Utilities",
    "CON": "Construction",
    "TRW": "Wholesale Trade",
    "TRL": "Transport",
    "ACF": "Accomodation & Food Services",
    "TEL": "Telco",
    "FIN": "Financial Services",
    "REA": "Real Estate",
    "TRA": "Travel",
    "ADM": "Administration",
    "EDU": "Education",
    "HEA": "Health",
    "OPS": "Personnal Services"
}
def renameCol(df, what, mapping):
    df[what+'_name'] = df[what].map(mapping)
    if df[what+'_name'].isnull().sum()>0:
        print("Unmapped values")
        print(df[df[what+'_name'].isnull()])
    return df

df_toexport = df[boolean_sec & boolean_flow].copy()
df_toexport = renameCol(df_toexport, 'buying_sector', rename_sec)
df_toexport = renameCol(df_toexport, 'supplying_sector', rename_sec)

In [19]:
df_toexport.to_csv(os.path.join(io_folder, 'intermediary_flows.csv'), index=False)

# Main sectors

In [5]:
sector_table['rel_output'] = sector_table['output']/sector_table['output'].sum()
sector_table.sort_values('output', ascending=False)

Unnamed: 0,sector,name,type,usd_per_ton,share_exporting_firms,output,rel_output
8,MTE,"Manufacture of textiles, leather, wearing appa...",manufacturing,11476.846024,0.05,5.781829e+09,1.729847e-01
0,CRO,Crops,agriculture,1335.356191,0.05,3.670631e+09,1.098204e-01
25,CON,Construction of buildings; Civil engineering; ...,construction,54279.025286,0.05,3.557242e+09,1.064280e-01
26,TRW,"Wholesale trade, except of motor vehicles and ...",trade,54279.025286,0.05,3.023945e+09,9.047242e-02
30,ACF,Accommodation; Food and beverage service activ...,services,54279.025286,0.05,2.275682e+09,6.808539e-02
28,TRL,Land transport and transport via pipelines; Wa...,transport,54279.025286,0.05,1.816747e+09,5.435467e-02
3,FIS,Fishing and aquaculture,primary,2696.984567,0.05,1.653898e+09,4.948242e-02
37,REA,Real estate activities,services,54279.025286,0.05,1.323541e+09,3.959860e-02
33,TEL,Programming and broadcasting activities; Telec...,services,54279.025286,0.05,9.166292e+08,2.742433e-02
23,ELE,"Electricity, gas, steam and air conditioning s...",utility,54279.025286,0.05,7.803478e+08,2.334697e-02
