In [1]:
import os
import sys
import time
import math
import numpy as np
import pandas as pd
import geopandas as gpd

input_folder = os.path.join('..', '..', '..', 'Ecuador', 'Data', 'Structured')
output_folder = os.path.join('..', 'input', 'Ecuador', 'Supply')

### Load sectors & IO

In [2]:
sectors = pd.read_csv(os.path.join(input_folder, "Sectors", "sectors.csv"))
sectors.head()

Unnamed: 0,letter,trigram,name
0,A,AGR,Agriculture
1,B,MIN,Mining and quarrying
2,C,MAN,Manufacturing
3,D,ELE,Electricity
4,E,WAT,Water supply and sewerage


In [3]:
io = pd.read_csv(os.path.join(input_folder, "io_tables", "from_NA", "aggregated_io_2019.csv"), index_col=0)
io.index = io.index.map(sectors.set_index("letter")['trigram'])
io.columns = io.columns.map(sectors.set_index("letter")['trigram'])
io

Unnamed: 0_level_0,AGR,MIN,MAN,ELE,WAT,CON,TRA,ACC,TRA,INF,FIN,REA,PRO,PUB,EDU,HEA,ART,HOU
ciiu_4n1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
AGR,1552236.0,27688.0,5973018.0,818.0,68.0,484485.0,1667.0,2523.0,90789.0,467.0,265.0,155.0,854.0,1234.0,6537.0,8306.0,1712.0,0.0
MIN,6287.0,1489715.0,1247693.0,68698.0,281.0,213406.0,8793.0,29006.0,1679.0,3232.0,1396.0,2833.0,3503.0,2074.0,1113.0,874.0,1065.0,0.0
MAN,1646571.0,705720.0,6069978.0,409086.0,27918.0,3374520.0,353667.0,872572.0,831732.0,190414.0,189126.0,41508.0,329148.0,364613.0,283590.0,614264.0,266409.0,0.0
ELE,67723.0,134399.0,275601.0,1310186.0,4712.0,22016.0,104145.0,47270.0,36595.0,109419.0,29843.0,8449.0,38733.0,49781.0,37544.0,25157.0,22553.0,0.0
WAT,86860.0,49159.0,78129.0,30729.0,4486.0,3037.0,14597.0,2701.0,17135.0,4739.0,3332.0,4464.0,17596.0,28793.0,21777.0,13082.0,3956.0,0.0
CON,5525.0,27433.0,39201.0,797.0,54.0,15314.0,1318.0,3731.0,7290.0,370.0,67116.0,743828.0,9472.0,33918.0,36389.0,42467.0,121985.0,0.0
TRA,495330.0,433528.0,3401964.0,98368.0,8350.0,1329293.0,255867.0,423051.0,244374.0,88797.0,79627.0,15954.0,183957.0,95906.0,97272.0,215484.0,70483.0,0.0
ACC,681501.0,725797.0,739853.0,66543.0,20504.0,389874.0,1660725.0,820037.0,19579.0,57788.0,62445.0,395651.0,281136.0,29024.0,31742.0,10399.0,30788.0,0.0
TRA,12017.0,105652.0,19756.0,1989.0,545.0,7162.0,6001.0,122786.0,1825.0,67227.0,70010.0,4372.0,12542.0,119520.0,63196.0,82130.0,1912.0,0.0
INF,37182.0,18950.0,104870.0,16218.0,11389.0,10140.0,148930.0,31230.0,53161.0,200753.0,94386.0,6296.0,61740.0,37436.0,30100.0,15264.0,57080.0,0.0


### Export tech coef

In [15]:
io.set_index()

TypeError: set_index() missing 1 required positional argument: 'keys'

### Export country trigrams

In [18]:
country = pd.read_excel(os.path.join(io_folder, "InputOutputTable.xlsx"), sheet_name="country")
country[['country_name','trigram']].to_csv(os.path.join(io_folder, 'country.csv'), index=False)

### Export final demand

In [22]:
df = pd.read_excel(os.path.join(io_folder, "InputOutputTable.xlsx"), sheet_name="final_demand")
col = {'trigram':"sector", 'final_demand_USD':'final_demand'}
df = df[list(col.keys())]
df = df.rename(columns=col)
df.to_csv(os.path.join(io_folder, 'final_demand.csv'), index=False)

### Export Tech Coef Matrix

In [25]:
df = pd.read_excel(os.path.join(io_folder, "InputOutputTable.xlsx"), sheet_name="domestic_techCoef")
df = df.loc[df.index != 'check',:]
df.to_csv(os.path.join(io_folder, 'tech_coef_matrix.csv'), index=True)

In [26]:
df = pd.read_excel(os.path.join(io_folder, "InputOutputTable.xlsx"), sheet_name="domestic_techCoef")
df = df.loc[df.index != 'check',:]
df = df.unstack().reset_index()
df.columns = ['buying_sector', 'supplying_sector', 'flow']
df.to_csv(os.path.join(io_folder, 'tech_coef_table.csv'), index=False)

### Export Flow Matrix Btw Sector For Analysis

In [13]:
df = pd.read_excel(os.path.join(io_folder, "InputOutputTable.xlsx"), sheet_name="ioTable", skiprows=1)
df = df[df.iloc[:,0]=="domestic"]
df.index = df.iloc[:,1].astype(int)
df = df.iloc[:,3:63]
df = df.unstack().reset_index()
df.columns = ['buying_sector', 'supplying_sector', 'flow']
df

# map sector
sector = pd.read_excel(os.path.join(io_folder, "InputOutputTable.xlsx"), sheet_name="sector")
sector.loc[sector['trigram'].duplicated(keep=False),:]
sector = sector['trigram']
df['buying_sector'] = (df['buying_sector']-1).map(sector)
df['supplying_sector'] = (df['supplying_sector']-1).map(sector)

# convert to USD
df['flow'] = df['flow'] * (1000000/4067.8)


#df.to_csv(os.path.join(io_folder, 'intermediary_flows.csv'), index=False)
#df = df.loc[df.index != 'check',:]
#df.to_csv(os.path.join(io_folder, 'tech_coef_matrix.csv'), index=True)

In [14]:
# keep only largest sector
sector_table = pd.read_csv(os.path.join(io_folder, 'sector_table.csv'))
boolean = sector_table['output']/sector_table['output'].sum() > 0.01
largest_sectors = sector_table.loc[boolean, 'sector'].tolist()
print(largest_sectors)

boolean_sec = df['buying_sector'].isin(largest_sectors) & df['supplying_sector'].isin(largest_sectors)
print(df.shape, df[boolean_sec].shape)

# eliminate flow too litlle
boolean_flow = df['flow'] > 100000
print(df.shape, df[boolean_flow].shape, df[boolean_sec & boolean_flow].shape)

['CRO', 'LIV', 'FOR', 'FIS', 'MIN', 'MFO', 'MBE', 'MTE', 'MRU', 'ELE', 'CON', 'TRW', 'TRL', 'ACF', 'TEL', 'FIN', 'REA', 'TRA', 'ADM', 'EDU', 'HEA', 'OPS']
(3600, 3) (484, 3)
(3600, 3) (993, 3) (339, 3)


In [18]:
rename_sec = {
    "CRO": "Agriculture",
    "LIV": "Livestock",
    "FOR": "Forestry & Logging",
    "FIS": "Fishing",
    "MIN": "Mining",
    "MFO": "Man. of Food Products",
    "MBE": "Man. of Beverages",
    "MTE": "Man. of Textiles",
    "MRU": "Man. of Rubber & Plastics",
    "ELE": "Utilities",
    "CON": "Construction",
    "TRW": "Wholesale Trade",
    "TRL": "Transport",
    "ACF": "Accomodation & Food Services",
    "TEL": "Telco",
    "FIN": "Financial Services",
    "REA": "Real Estate",
    "TRA": "Travel",
    "ADM": "Administration",
    "EDU": "Education",
    "HEA": "Health",
    "OPS": "Personnal Services"
}
def renameCol(df, what, mapping):
    df[what+'_name'] = df[what].map(mapping)
    if df[what+'_name'].isnull().sum()>0:
        print("Unmapped values")
        print(df[df[what+'_name'].isnull()])
    return df

df_toexport = df[boolean_sec & boolean_flow].copy()
df_toexport = renameCol(df_toexport, 'buying_sector', rename_sec)
df_toexport = renameCol(df_toexport, 'supplying_sector', rename_sec)

In [19]:
df_toexport.to_csv(os.path.join(io_folder, 'intermediary_flows.csv'), index=False)

# Main sectors

In [5]:
sector_table['rel_output'] = sector_table['output']/sector_table['output'].sum()
sector_table.sort_values('output', ascending=False)

Unnamed: 0,sector,name,type,usd_per_ton,share_exporting_firms,output,rel_output
8,MTE,"Manufacture of textiles, leather, wearing appa...",manufacturing,11476.846024,0.05,5.781829e+09,1.729847e-01
0,CRO,Crops,agriculture,1335.356191,0.05,3.670631e+09,1.098204e-01
25,CON,Construction of buildings; Civil engineering; ...,construction,54279.025286,0.05,3.557242e+09,1.064280e-01
26,TRW,"Wholesale trade, except of motor vehicles and ...",trade,54279.025286,0.05,3.023945e+09,9.047242e-02
30,ACF,Accommodation; Food and beverage service activ...,services,54279.025286,0.05,2.275682e+09,6.808539e-02
28,TRL,Land transport and transport via pipelines; Wa...,transport,54279.025286,0.05,1.816747e+09,5.435467e-02
3,FIS,Fishing and aquaculture,primary,2696.984567,0.05,1.653898e+09,4.948242e-02
37,REA,Real estate activities,services,54279.025286,0.05,1.323541e+09,3.959860e-02
33,TEL,Programming and broadcasting activities; Telec...,services,54279.025286,0.05,9.166292e+08,2.742433e-02
23,ELE,"Electricity, gas, steam and air conditioning s...",utility,54279.025286,0.05,7.803478e+08,2.334697e-02
