In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
input_folder = os.path.join('..', '..', '..', 'Ecuador', 'Data')
output_folder = os.path.join('..', 'input', 'Ecuador', 'National')

# Load Tanzania inventory data and map it into the sector we are using

In [32]:
isicClass_to_isicSection = pd.read_csv(os.path.join(input_folder, 'Structured', 'Inventories', "ISICRev4_link.txt"), sep="\t").set_index('Class')['Section']
isicSection_to_sector13 = pd.read_csv(os.path.join(input_folder, 'Structured', 'Inventories', "ISICRev4Section_13sectors.csv")).set_index('ISICRev4Section')['13sectors']
isicSectionName_to_isicSectionCode = pd.read_csv(os.path.join(input_folder, "Structured", "Inventories", "ISICRev4_section_forMappingSupplierSurvey.txt"), delimiter="\t", dtype=str).set_index('Description')['Section']

isic4class_to_sector59 = pd.read_csv(os.path.join(input_folder, 'Structured', 'Sectors', "isic4class_to_59sector.csv"), sep=",", dtype={'isic4_map': str}).dropna()
isic4class_to_sector59 = isic4class_to_sector59.dropna(subset=['isic4_map']).set_index('isic4_map')['trigram']
isic4section_to_sector59 = pd.read_csv(os.path.join(input_folder, 'Structured', 'Sectors', "isic4section_to_59sector.csv"), sep=",").dropna()
isic4section_to_sector59

Unnamed: 0,sector,isic4_section
0,AYG,A
1,AZU,A
2,CAN,A
3,CER,A
4,CIN,A
...,...,...
56,SAL,Q
57,ASO,R
58,ASO,S
59,DOM,T


In [33]:
stacked_table

Unnamed: 0,sector_code,supplier_sector,supplier_inventory,supplier_sector_code
0,ASO,Wholesale and retail trade; repair of motor ve...,7.0,G
1,MAN,"Electricity, gas, steam and air conditioning s...",10.0,D
4,RES,Accommodation and food service activities,7.0,I
5,FIN,"Professional, scientific and technical activities",30.0,M
6,RES,Wholesale and retail trade; repair of motor ve...,7.0,G
...,...,...,...,...
796,COM,"Agriculture, forestry and fishing",2.0,A
798,TEL,Manufacturing,30.0,C
806,CON,Construction,14.0,F
824,REP,Wholesale and retail trade; repair of motor ve...,7.0,G


In [41]:
# load data
data = pd.read_stata(os.path.join(input_folder, '..', "..", "Tanzania", "Survey", "Results", 'Firm_TZ_weighted_27February_2019.dta'))
print(data.shape)

# map sectors of buyers
# note that here we have the ISIC4 Class (4-digit).
#data['sector_code'] = data['icisorg'].map(isicClass_to_isicSection)
data['sector_code'] = data['icisorg'].astype(str).map(isic4_to_sector59)
#print(data['sector_code'].isnull().sum())

# Load supplier data. 1 group of columns for five main suppliers
# rename columns and treat coded values
data = data.rename(columns={'q4_2_'+str(i):'supplier_'+str(i)+'_sector' for i in range(1,6)})
data = data.rename(columns={'q4_9_'+str(i):'supplier_'+str(i)+'_inventory' for i in range(1,6)})
for i in range(1,6):
    data['supplier_'+str(i)+'_inventory'] = data['supplier_'+str(i)+'_inventory'].replace([8888,9999],np.nan)
    
# put all supplier inventory and sector together
stacked_table = pd.concat([
    data[['sector_code', 'supplier_'+str(i)+'_sector', "supplier_"+str(i)+"_inventory"]]\
        .copy()\
        .rename(columns={
            'supplier_'+str(i)+'_sector':'supplier_sector', "supplier_"+str(i)+"_inventory":"supplier_inventory"
        }) 
    for i in range(1,6)
])
stacked_table = stacked_table.dropna()

# map sectors of suppliers
# note that here we only have the ISIC4 Section (letter).
# since one ISIC4 section can correspond to several sectors, we do a merge
stacked_table['supplier_sector_code'] = stacked_table['supplier_sector'].map(isicSectionName_to_isicSectionCode)
print(stacked_table.shape)
stacked_table = stacked_table.merge(
    isic4section_to_sector59,
    left_on="supplier_sector_code",
    right_on="isic4_section",
    how="left"
)
print(stacked_table.shape) #more row because we duplicated when one ISIC4 section correspond to several sectors
stacked_table = stacked_table.drop(columns=["supplier_sector_code"]).rename(columns={'sector': "supplier_sector_code"})

#print(stacked_table['supplier_sector_code'])
#stacked_table['supplier_sector_code'] = stacked_table['supplier_sector_code'].map(isicSection_to_sector13)
print(stacked_table['supplier_sector_code'].isnull().sum())

# compute mean
res = stacked_table.groupby(['supplier_sector_code', 'sector_code'])['supplier_inventory'].mean().reset_index()
res

(837, 399)
(1828, 4)
(8845, 6)
0


  data['sector_code'] = data['icisorg'].astype(str).map(isic4_to_sector59)


Unnamed: 0,supplier_sector_code,sector_code,supplier_inventory
0,ADM,ADM,190.000000
1,ADM,ADP,7.000000
2,ADM,ASO,30.000000
3,ADM,CEM,365.000000
4,ADM,COM,42.000000
...,...,...,...
1018,VID,REP,124.000000
1019,VID,RES,10.029412
1020,VID,TEL,94.200000
1021,VID,TRA,96.000000


# Turn it into a matrix

In [48]:
# prepare matrix and fill it
mat = res.set_index(['sector_code', 'supplier_sector_code']).unstack(level=0)
mat.columns = mat.columns.droplevel(0)
print(mat.shape)

# add missing columns and rows
all_sectors = pd.read_csv(os.path.join(input_folder, 'Structured', 'Sectors', '59sector_sector_table.csv'))
missing_col = list(set(all_sectors['sector'].iloc[:-1].to_list()) - set(mat.columns))
missing_row = list(set(all_sectors['sector'].iloc[:-1].to_list()) - set(mat.index)) + ['IMP']
print(missing_col, missing_row)
for row in missing_row:
    mat.loc[row] = None
for col in missing_col:
    mat[col] = None
print(mat.shape)

(57, 35)
['PAP', 'PES', 'PPR', 'SIL', 'LAC', 'BAL', 'VID', 'ALD', 'FRT', 'CUL', 'CER', 'MET', 'CIN', 'GAN', 'AZU', 'ELE', 'DOM', 'REF', 'MIP', 'FRV', 'TAB', 'CAU', 'CHO', 'CUE'] ['REP', 'INM', 'IMP']
(60, 59)


# To fill the gaps, model the inventories as 0.5*(av per input type over all buyer) + 0.5*(av per buyer type over all input)

In [49]:
mat_av.index.sort_values()

Index(['ACF', 'ADM', 'ADP', 'AGR', 'AGU', 'ALD', 'ASO', 'AYG', 'AZU', 'BNA',
       'CAN', 'CAR', 'CAU', 'CEM', 'CER', 'CHO', 'CIN', 'COM', 'CON', 'CUE',
       'CUL', 'DEM', 'DOM', 'EDU', 'ELE', 'FID', 'FIN', 'FRT', 'FRV', 'GAN',
       'HEA', 'HIL', 'HOT', 'IMP', 'INM', 'LAC', 'MAD', 'MAN', 'MAQ', 'MET',
       'MIN', 'MIP', 'MOL', 'MUE', 'OTH', 'PAN', 'PAP', 'PES', 'PLS', 'POS',
       'PPR', 'PRO', 'PUB', 'QU1', 'QU2', 'REF', 'REP', 'RES', 'SAL', 'SEG',
       'SIL', 'TAB', 'TEL', 'TIC', 'TRA', 'UTI', 'VES', 'VID'],
      dtype='object', name='supplier_sector_code')

In [50]:
# compute average inventory per input type (over all buyer), per buyer type (over all input), and global average
av_inventory_per_input_type = mat.mean(axis=1)
av_inventory_per_buyer_type = mat.mean(axis=0)
av_inventory = mat.mean().mean()

# compute the modeled inventory based on the available data. inventory = 1/2 * av_inventory_per_input_type + 1/2 * av_inventory_per_buyer_type
# in other words, inventory depends 50% on the sector, and 50% on the input type
mat_av_inventory_per_input_type = pd.DataFrame(index=mat.columns, columns=av_inventory_per_input_type.index, data=[av_inventory_per_input_type]*len(mat.columns)).transpose()
mat_av_inventory_per_buyer_type = pd.DataFrame(index=av_inventory_per_input_type.index, columns=mat.columns, data=[av_inventory_per_buyer_type]*len(mat.index))
mat_av = (mat_av_inventory_per_input_type + mat_av_inventory_per_buyer_type) / 2
mat_av[mat_av_inventory_per_buyer_type.isnull()] = mat_av_inventory_per_input_type[mat_av_inventory_per_buyer_type.isnull()]
mat_av[mat_av_inventory_per_input_type.isnull()] = mat_av_inventory_per_buyer_type[mat_av_inventory_per_input_type.isnull()]
mat_av[mat_av.isnull()] = av_inventory
mat_av

sector_code,ADM,ADP,AGU,ASO,AYG,BNA,CAN,CAR,CEM,COM,...,AZU,ELE,DOM,REF,MIP,FRV,TAB,CAU,CHO,CUE
supplier_sector_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ADM,73.813019,94.571578,100.733333,61.082727,114.082051,69.72,63.633333,62.392157,71.405277,74.371164,...,114.666667,114.666667,114.666667,114.666667,114.666667,114.666667,114.666667,114.666667,114.666667,114.666667
ADP,53.646352,74.404911,80.566667,40.916061,93.915385,49.553333,43.466667,42.22549,51.238611,54.204497,...,74.333333,74.333333,74.333333,74.333333,74.333333,74.333333,74.333333,74.333333,74.333333,74.333333
AGU,29.573435,50.331995,56.49375,16.843144,69.842468,25.480417,19.39375,18.152574,27.165694,30.131581,...,26.1875,26.1875,26.1875,26.1875,26.1875,26.1875,26.1875,26.1875,26.1875,26.1875
ALD,47.473015,68.231575,74.39333,34.742724,87.742048,43.379997,37.29333,36.052154,45.065274,48.031161,...,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666
ASO,71.146352,91.904911,98.066667,58.416061,111.415385,67.053333,60.966667,59.72549,68.738611,71.704497,...,109.333333,109.333333,109.333333,109.333333,109.333333,109.333333,109.333333,109.333333,109.333333,109.333333
AYG,32.924824,53.683383,59.845139,20.194533,73.193857,28.831806,22.745139,21.503962,30.517083,33.482969,...,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278
AZU,32.924824,53.683383,59.845139,20.194533,73.193857,28.831806,22.745139,21.503962,30.517083,33.482969,...,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278
BAL,47.473015,68.231575,74.39333,34.742724,87.742048,43.379997,37.29333,36.052154,45.065274,48.031161,...,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666
BNA,47.473015,68.231575,74.39333,34.742724,87.742048,43.379997,37.29333,36.052154,45.065274,48.031161,...,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666
CAN,32.924824,53.683383,59.845139,20.194533,73.193857,28.831806,22.745139,21.503962,30.517083,33.482969,...,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278


# Fill the gaps

In [51]:
# now, in the matrix with only data-based inventories, for any missing values, use the one from the average matrix
filled_mat = mat.copy()
filled_mat = filled_mat.mask(filled_mat.isnull(), mat_av)
filled_mat_week = filled_mat / 7
print(filled_mat_week.shape)
filled_mat_week

(60, 59)


sector_code,ADM,ADP,AGU,ASO,AYG,BNA,CAN,CAR,CEM,COM,...,AZU,ELE,DOM,REF,MIP,FRV,TAB,CAU,CHO,CUE
supplier_sector_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ADM,27.142857,1.0,14.390476,4.285714,16.297436,9.96,9.090476,8.913165,52.142857,6.0,...,16.380952,16.380952,16.380952,16.380952,16.380952,16.380952,16.380952,16.380952,16.380952,16.380952
ADP,7.663765,10.629273,11.509524,5.845152,13.416484,7.079048,6.209524,6.032213,7.319802,7.7435,...,10.619048,10.619048,10.619048,10.619048,10.619048,10.619048,10.619048,10.619048,10.619048,10.619048
AGU,2.214286,11.0,8.070536,2.406163,9.977495,3.64006,2.770536,2.593225,3.880813,4.285714,...,3.741071,3.741071,3.741071,3.741071,3.741071,3.741071,3.741071,3.741071,3.741071,3.741071
ALD,2.857143,11.214286,10.627619,0.857143,25.714286,3.607143,5.327619,5.150308,3.805195,5.987871,...,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237
ASO,22.0,4.285714,14.009524,8.345152,15.916484,9.579048,8.709524,8.532213,9.819802,12.857143,...,15.619048,15.619048,15.619048,15.619048,15.619048,15.619048,15.619048,15.619048,15.619048,15.619048
AYG,4.703546,7.669055,8.549306,2.884933,0.657143,4.118829,1.8,1.0,0.428571,3.521429,...,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611
AZU,4.703546,7.669055,8.549306,2.884933,0.657143,4.118829,1.8,1.0,0.428571,3.521429,...,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611
BAL,2.857143,11.214286,10.627619,0.857143,25.714286,3.607143,5.327619,5.150308,3.805195,5.987871,...,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237
BNA,2.857143,11.214286,10.627619,0.857143,25.714286,3.607143,5.327619,5.150308,3.805195,5.987871,...,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237
CAN,4.703546,7.669055,8.549306,2.884933,0.657143,4.118829,1.8,1.0,0.428571,3.521429,...,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611


# Export

In [52]:
inventory_duration_targets = filled_mat_week.unstack().reset_index()
inventory_duration_targets = inventory_duration_targets.rename(columns={"sector_code":"buying_sector", "supplier_sector_code":"input_sector", 0:"inventory_duration_target"})
inventory_duration_targets.to_csv(os.path.join(output_folder, "59sector_inventory_targets.csv"), index=False)