In [1]:
import os
import pandas as pd
import numpy as np

In [43]:
input_folder = os.path.join('..', '..', '..', 'Ecuador', 'Data')
output_folder = os.path.join('..', 'input', 'Ecuador', 'National')

# Load Tanzania inventory data and map it into the sector we are using

In [35]:
isicClass_to_isicSection = pd.read_csv(os.path.join(input_folder, 'Structured', 'Inventories', "ISICRev4_link.txt"), sep="\t").set_index('Class')['Section']
isicSection_to_sector13 = pd.read_csv(os.path.join(input_folder, 'Structured', 'Inventories', "ISICRev4Section_13sectors.csv")).set_index('ISICRev4Section')['13sectors']
isicSectionName_to_isicSectionCode = pd.read_csv(os.path.join(input_folder, "Structured", "Inventories", "ISICRev4_section_forMappingSupplierSurvey.txt"), delimiter="\t", dtype=str).set_index('Description')['Section']

isic4_to_sector61 = pd.read_csv(os.path.join(input_folder, 'Structured', 'Sectors', "isic4_to_61sector.csv"), sep=",", dtype={'isic4_map': str})
isic4_to_sector61 = isic4_to_sector61.dropna(subset=['isic4_map']).set_index('isic4_map')['trigram']

In [38]:
# load data
data = pd.read_stata(os.path.join(input_folder, '..', "..", "Tanzania", "Survey", "Results", 'Firm_TZ_weighted_27February_2019.dta'))
print(data.shape)

# map sectors
#data['sector_code'] = data['icisorg'].map(isicClass_to_isicSection)
data['sector_code'] = data['icisorg'].astype(str).map(isic4_to_sector61)
#print(data['sector_code'].isnull().sum())

# Load supplier data. 1 group of columns for five main suppliers
# rename columns and treat coded values
data = data.rename(columns={'q4_2_'+str(i):'supplier_'+str(i)+'_sector' for i in range(1,6)})
data = data.rename(columns={'q4_9_'+str(i):'supplier_'+str(i)+'_inventory' for i in range(1,6)})
for i in range(1,6):
    data['supplier_'+str(i)+'_inventory'] = data['supplier_'+str(i)+'_inventory'].replace([8888,9999],np.nan)
    
# put all supplier inventory and sector together
stacked_table = pd.concat([data[['sector_code', 'supplier_'+str(i)+'_sector', "supplier_"+str(i)+"_inventory"]].copy().rename(columns={'supplier_'+str(i)+'_sector':'supplier_sector', "supplier_"+str(i)+"_inventory":"supplier_inventory"}) for i in range(1,6)])
stacked_table = stacked_table.dropna()

# map sectors of suppliers
stacked_table['supplier_sector_code'] = stacked_table['supplier_sector'].map(isicSectionName_to_isicSectionCode)
stacked_table['supplier_sector_code'] = stacked_table['supplier_sector_code'].map(isicSection_to_sector13)
print(stacked_table['supplier_sector_code'].isnull().sum())

# compute mean
res = stacked_table.groupby(['supplier_sector_code', 'sector_code'])['supplier_inventory'].mean().reset_index()
res

(837, 399)
0


  data['sector_code'] = data['icisorg'].astype(str).map(isic4_to_sector61)


Unnamed: 0,supplier_sector_code,sector_code,supplier_inventory
0,ACF,ADM,30.0
1,ACF,ASO,7.0
2,ACF,CAR,7.0
3,ACF,CEM,30.0
4,ACF,COM,15.0
...,...,...,...
152,UTI,POS,18.0
153,UTI,QU1,0.0
154,UTI,REP,30.0
155,UTI,RES,30.0


# Turn it into a matrix

In [39]:
# prepare matrix and fill it
mat = res.set_index(['sector_code', 'supplier_sector_code']).unstack(level=0)
mat.columns = mat.columns.droplevel(0)
print(mat.shape)

# add missing columns and rows
missing_col = list(set(isicSection_to_sector13.to_list()) - set(mat.columns))
missing_row = list(set(isicSection_to_sector13.to_list()) - set(mat.index)) + ['IMP']
print(missing_col, missing_row)
for row in missing_row:
    mat.loc[row] = None
for col in missing_col:
    mat[col] = None
print(mat.shape)

(14, 35)
['TIC', 'OTH', 'ACF', 'AGR', 'MIN', 'PRO', 'PUB', 'HEA', 'UTI'] ['IMP']
(15, 44)


# To fill the gaps, model the inventories as 0.5*(av per input type over all buyer) + 0.5*(av per buyer type over all input)

In [40]:
# compute average inventory per input type (over all buyer), per buyer type (over all input), and global average
av_inventory_per_input_type = mat.mean(axis=1)
av_inventory_per_buyer_type = mat.mean(axis=0)
av_inventory = mat.mean().mean()

# compute the modeled inventory based on the available data. inventory = 1/2 * av_inventory_per_input_type + 1/2 * av_inventory_per_buyer_type
# in other words, inventory depends 50% on the sector, and 50% on the input type
mat_av_inventory_per_input_type = pd.DataFrame(index=mat.columns, columns=av_inventory_per_input_type.index, data=[av_inventory_per_input_type]*len(mat.columns)).transpose()
mat_av_inventory_per_buyer_type = pd.DataFrame(index=av_inventory_per_input_type.index, columns=mat.columns, data=[av_inventory_per_buyer_type]*len(mat.index))
mat_av = (mat_av_inventory_per_input_type + mat_av_inventory_per_buyer_type) / 2
mat_av[mat_av_inventory_per_buyer_type.isnull()] = mat_av_inventory_per_input_type[mat_av_inventory_per_buyer_type.isnull()]
mat_av[mat_av_inventory_per_input_type.isnull()] = mat_av_inventory_per_buyer_type[mat_av_inventory_per_input_type.isnull()]
mat_av[mat_av.isnull()] = av_inventory
mat_av

sector_code,ADM,ADP,AGU,ASO,AYG,BNA,CAN,CAR,CEM,COM,...,VES,TIC,OTH,ACF,AGR,MIN,PRO,PUB,HEA,UTI
supplier_sector_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ACF,44.289531,38.50854,52.382025,16.678388,46.748691,18.627858,15.282025,21.315358,43.38051,25.684049,...,69.969318,17.96405,17.96405,17.96405,17.96405,17.96405,17.96405,17.96405,17.96405,17.96405
AGR,51.752645,45.971654,59.845139,24.141503,54.211806,26.090972,22.745139,28.778472,50.843624,33.147163,...,77.432432,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278,32.890278
CON,43.689589,37.908598,51.782083,16.078447,46.14875,18.027917,14.682083,20.715417,42.780568,25.084108,...,69.369376,16.764167,16.764167,16.764167,16.764167,16.764167,16.764167,16.764167,16.764167,16.764167
EDU,70.099173,64.318182,78.191667,42.48803,72.558333,44.4375,41.091667,47.125,69.190152,51.493691,...,95.778959,69.583333,69.583333,69.583333,69.583333,69.583333,69.583333,69.583333,69.583333,69.583333
FIN,50.307506,44.526515,58.4,22.696364,52.766667,24.645833,21.3,27.333333,49.398485,31.702024,...,75.987293,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0
HEA,83.839123,78.058132,91.931617,56.227981,86.298284,58.17745,54.831617,60.86495,82.930102,65.233641,...,109.51891,97.063234,97.063234,97.063234,97.063234,97.063234,97.063234,97.063234,97.063234,97.063234
MAN,66.300836,60.519845,74.39333,38.689694,68.759997,40.639163,37.29333,43.326663,65.391815,47.695354,...,91.980623,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666,61.98666
MIN,40.678538,34.897547,48.771032,13.067395,43.137698,15.016865,11.671032,17.704365,39.769517,22.073056,...,66.358324,10.742063,10.742063,10.742063,10.742063,10.742063,10.742063,10.742063,10.742063,10.742063
OTH,74.571395,68.790404,82.663889,46.960253,77.030556,48.909722,45.563889,51.597222,73.662374,55.965913,...,100.251182,78.527778,78.527778,78.527778,78.527778,78.527778,78.527778,78.527778,78.527778,78.527778
PRO,92.640839,86.859848,100.733333,65.029697,95.1,66.979167,63.633333,69.666667,91.731818,74.035358,...,118.320626,114.666667,114.666667,114.666667,114.666667,114.666667,114.666667,114.666667,114.666667,114.666667


# Fill the gaps

In [41]:
# now, in the matrix with only data-based inventories, for any missing values, use the one from the average matrix
filled_mat = mat.copy()
filled_mat = filled_mat.mask(filled_mat.isnull(), mat_av)
filled_mat_week = filled_mat / 7
filled_mat_week

sector_code,ADM,ADP,AGU,ASO,AYG,BNA,CAN,CAR,CEM,COM,...,VES,TIC,OTH,ACF,AGR,MIN,PRO,PUB,HEA,UTI
supplier_sector_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ACF,4.285714,5.50122,7.483146,1.0,6.678384,2.661123,2.183146,1.0,4.285714,2.142857,...,9.995617,2.566293,2.566293,2.566293,2.566293,2.566293,2.566293,2.566293,2.566293,2.566293
AGR,7.393235,6.567379,8.549306,3.448786,0.657143,3.727282,1.8,1.0,0.428571,3.521429,...,4.285714,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611,4.698611
CON,4.285714,5.415514,7.39744,2.296921,6.592679,2.575417,2.09744,2.959345,4.952381,3.419048,...,9.909911,2.394881,2.394881,2.394881,2.394881,2.394881,2.394881,2.394881,2.394881,2.394881
EDU,10.014168,9.188312,11.170238,6.069719,10.365476,6.348214,5.870238,6.732143,9.884307,2.428571,...,13.682708,9.940476,9.940476,9.940476,9.940476,9.940476,9.940476,9.940476,9.940476,9.940476
FIN,7.186787,6.360931,8.342857,3.242338,7.538095,3.520833,3.042857,3.904762,7.056926,4.528861,...,10.855328,4.285714,4.285714,4.285714,4.285714,4.285714,4.285714,4.285714,4.285714,4.285714
HEA,11.977018,11.151162,13.133088,8.032569,12.328326,8.311064,7.833088,8.694993,22.285714,4.268571,...,15.645559,13.866176,13.866176,13.866176,13.866176,13.866176,13.866176,13.866176,13.866176,13.866176
MAN,2.857143,11.214286,10.627619,0.857143,25.714286,3.607143,5.327619,6.189523,3.805195,5.987871,...,8.353383,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237,8.855237
MIN,0.285714,4.985364,6.96729,0.428571,6.162528,2.145266,1.66729,2.529195,1.206349,2.392857,...,9.479761,1.53458,1.53458,1.53458,1.53458,1.53458,1.53458,1.53458,1.53458,1.53458
OTH,22.0,4.285714,11.809127,6.708608,11.004365,6.987103,6.509127,7.371032,10.523196,12.857143,...,14.321597,11.218254,11.218254,11.218254,11.218254,11.218254,11.218254,11.218254,11.218254,11.218254
PRO,27.142857,1.0,14.390476,4.285714,13.585714,9.568452,9.090476,9.952381,52.142857,6.0,...,16.902947,16.380952,16.380952,16.380952,16.380952,16.380952,16.380952,16.380952,16.380952,16.380952


# Export

In [44]:
inventory_duration_targets = filled_mat_week.unstack().reset_index()
inventory_duration_targets = inventory_duration_targets.rename(columns={"sector_code":"buying_sector", "supplier_sector_code":"input_sector", 0:"inventory_duration_target"})
inventory_duration_targets.to_csv(os.path.join(output_folder, "61sector_inventory_targets.csv"), index=False)