In [11]:
import os
import pandas as pd
import numpy as np

In [2]:
input_folder = os.path.join('..', '..', '..', 'Ecuador', 'Data')
output_folder = os.path.join('..', 'input', 'Ecuador', 'Supply')

# Load Tanzania inventory data and map it into the sector we are using

In [79]:
isicClass_to_isicSection = pd.read_csv(os.path.join(input_folder, 'Structured', 'Inventories', "ISICRev4_link.txt"), sep="\t").set_index('Class')['Section']
isicSection_to_sector13 = pd.read_csv(os.path.join(input_folder, 'Structured', 'Inventories', "ISICRev4Section_13sectors.csv")).set_index('ISICRev4Section')['13sectors']
isicSectionName_to_isicSectionCode = pd.read_csv(os.path.join(input_folder, "Structured", "Inventories", "ISICRev4_section_forMappingSupplierSurvey.txt"), delimiter="\t", dtype=str).set_index('Description')['Section']

In [80]:
# load data
#data = pd.read_stata(os.path.join(input_folder, '..', "..", "Tanzania", "Survey", "Results", 'Firm_TZ_weighted_27February_2019.dta'))
print(data.shape)

# map sectors
data['sector_code'] = data['icisorg'].map(isicClass_to_isicSection)
data['sector_code'] = data['sector_code'].map(isicSection_to_sector13)
print(stacked_table['sector_code'].isnull().sum())

# Load supplier data. 1 group of columns for five main suppliers
# rename columns and treat coded values
data = data.rename(columns={'q4_2_'+str(i):'supplier_'+str(i)+'_sector' for i in range(1,6)})
data = data.rename(columns={'q4_9_'+str(i):'supplier_'+str(i)+'_inventory' for i in range(1,6)})
for i in range(1,6):
    data['supplier_'+str(i)+'_inventory'] = data['supplier_'+str(i)+'_inventory'].replace([8888,9999],np.nan)
    
# put all supplier inventory and sector together
stacked_table = pd.concat([data[['sector_code', 'supplier_'+str(i)+'_sector', "supplier_"+str(i)+"_inventory"]].copy().rename(columns={'supplier_'+str(i)+'_sector':'supplier_sector', "supplier_"+str(i)+"_inventory":"supplier_inventory"}) for i in range(1,6)])
stacked_table = stacked_table.dropna()

# map sectors of suppliers
stacked_table['supplier_sector_code'] = stacked_table['supplier_sector'].map(isicSectionName_to_isicSectionCode)
stacked_table['supplier_sector_code'] = stacked_table['supplier_sector_code'].map(isicSection_to_sector13)
print(stacked_table['supplier_sector_code'].isnull().sum())

# compute mean
res = stacked_table.groupby(['supplier_sector_code', 'sector_code'])['supplier_inventory'].mean().reset_index()
res

(837, 400)
0
0


Unnamed: 0,supplier_sector_code,sector_code,supplier_inventory
0,ACF,ACF,7.343750
1,ACF,EDU,50.250000
2,ACF,HEA,14.000000
3,ACF,MAN,26.714286
4,ACF,OTH,7.000000
...,...,...,...
85,UTI,MAN,7.750000
86,UTI,PRO,41.857143
87,UTI,PUB,77.000000
88,UTI,TIC,31.714286


# Turn it into a matrix

In [76]:
# prepare matrix and fill it
mat = res.set_index(['sector_code', 'supplier_sector_code']).unstack(level=0)
mat.columns = mat.columns.droplevel(0)
print(mat.shape)

# add missing columns and rows
missing_col = list(set(isicSection_to_sector13.to_list()) - set(mat.columns))
missing_row = list(set(isicSection_to_sector13.to_list()) - set(mat.index)) + ['IMP']
print(missing_col, missing_row)
for row in missing_row:
    mat.loc[row] = None
for col in missing_col:
    mat[col] = None
print(mat.shape)

(14, 12)
['AGR', 'MIN'] ['IMP']
(15, 14)


# To fill the gaps, model the inventories as 0.5*(av per input type over all buyer) + 0.5*(av per buyer type over all input)

In [81]:
# compute average inventory per input type (over all buyer), per buyer type (over all input), and global average
av_inventory_per_input_type = mat.mean(axis=1)
av_inventory_per_buyer_type = mat.mean(axis=0)
av_inventory = mat.mean().mean()

# compute the modeled inventory based on the available data. inventory = 1/2 * av_inventory_per_input_type + 1/2 * av_inventory_per_buyer_type
# in other words, inventory depends 50% on the sector, and 50% on the input type
mat_av_inventory_per_input_type = pd.DataFrame(index=mat.columns, columns=av_inventory_per_input_type.index, data=[av_inventory_per_input_type]*len(mat.columns)).transpose()
mat_av_inventory_per_buyer_type = pd.DataFrame(index=av_inventory_per_input_type.index, columns=mat.columns, data=[av_inventory_per_buyer_type]*len(mat.index))
mat_av = (mat_av_inventory_per_input_type + mat_av_inventory_per_buyer_type) / 2
mat_av[mat_av_inventory_per_buyer_type.isnull()] = mat_av_inventory_per_input_type[mat_av_inventory_per_buyer_type.isnull()]
mat_av[mat_av_inventory_per_input_type.isnull()] = mat_av_inventory_per_buyer_type[mat_av_inventory_per_input_type.isnull()]
mat_av[mat_av.isnull()] = av_inventory
mat_av

sector_code,ACF,CON,EDU,FIN,HEA,MAN,OTH,PRO,PUB,TIC,TRA,UTI,AGR,MIN
supplier_sector_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
ACF,19.918633,17.994252,101.239804,22.670294,25.888154,50.774911,17.153116,48.490482,38.983267,31.742751,26.62054,52.856752,18.913504,18.913504
AGR,34.506064,32.581684,115.827236,37.257725,40.475585,65.362343,31.740547,63.077914,53.570699,46.330183,41.207972,67.444184,48.088367,48.088367
CON,20.020214,18.095833,101.341385,22.771875,25.989735,50.876492,17.254697,48.592063,39.084848,31.844332,26.722122,52.958333,19.116667,19.116667
EDU,52.899381,50.975,134.220552,55.651042,58.868901,83.755659,50.133864,81.47123,71.964015,64.723499,59.601288,85.8375,84.875,84.875
FIN,25.461881,23.5375,106.783052,28.213542,31.431401,56.318159,22.696364,54.03373,44.526515,37.285999,32.163788,58.4,30.0,30.0
HEA,60.751402,58.827021,142.072573,63.503063,66.720923,91.60768,57.985885,89.323251,79.816036,72.57552,67.45331,93.689521,100.579043,100.579043
MAN,40.551509,38.627129,121.872681,43.30317,46.52103,71.407788,37.785992,69.123359,59.616144,52.375628,47.253417,73.489629,60.179257,60.179257
MIN,14.407714,12.483333,95.728885,17.159375,20.377235,45.263992,11.642197,42.979563,33.472348,26.231832,21.109622,47.345833,7.891667,7.891667
OTH,49.72577,47.801389,131.046941,52.477431,55.69529,80.582048,46.960253,78.297619,68.790404,61.549888,56.427677,82.663889,78.527778,78.527778
PRO,67.795214,65.870833,149.116385,70.546875,73.764735,98.651492,65.029697,96.367063,86.859848,79.619332,74.497122,100.733333,114.666667,114.666667


# Fill the gaps

In [82]:
# now, in the matrix with only data-based inventories, for any missing values, use the one from the average matrix
filled_mat = mat.copy()
filled_mat = filled_mat.mask(filled_mat.isnull(), mat_av)
filled_mat_week = filled_mat / 7
filled_mat_week

sector_code,ACF,CON,EDU,FIN,HEA,MAN,OTH,PRO,PUB,TIC,TRA,UTI,AGR,MIN
supplier_sector_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
ACF,1.049107,2.570607,7.178571,3.238613,2.0,3.816327,1.0,4.285714,5.569038,0.142857,2.142857,7.550965,2.701929,2.701929
AGR,3.061224,4.654526,28.214286,4.285714,4.285714,2.577143,4.534364,9.011131,7.652957,2.142857,3.521429,9.634883,6.869767,6.869767
CON,2.860031,1.815476,14.477341,1.333333,3.712819,2.952381,2.464957,4.285714,5.58355,4.54919,3.267857,7.565476,2.730952,2.730952
EDU,7.557054,7.282143,20.25,7.950149,8.409843,11.965094,7.161981,11.638747,10.280574,9.246214,4.0,12.2625,12.125,12.125
FIN,3.637412,3.3625,15.254722,4.285714,4.4902,8.045451,3.242338,7.719104,6.360931,5.326571,4.594827,8.342857,4.285714,4.285714
HEA,8.678772,8.40386,37.214286,9.071866,4.133739,11.857143,8.283698,12.760464,11.402291,10.367931,4.268571,13.384217,14.368435,14.368435
MAN,3.420849,0.285714,27.742857,4.285714,6.645861,5.880399,0.857143,12.380952,11.214286,13.591837,6.310616,10.498518,8.597037,8.597037
MIN,2.058245,1.142857,13.675555,2.451339,2.911034,1.514286,0.428571,0.285714,4.781764,1.0,2.392857,6.76369,1.127381,1.127381
OTH,1.0,6.82877,26.785714,7.496776,7.95647,11.511721,6.708608,22.0,4.285714,0.380952,12.857143,11.809127,11.218254,11.218254
PRO,9.685031,9.410119,34.857143,4.285714,0.428571,52.142857,4.285714,27.142857,1.0,17.285714,6.0,14.390476,16.380952,16.380952


# Export

In [86]:
inventory_duration_targets = filled_mat_week.unstack().reset_index()
inventory_duration_targets = inventory_duration_targets.rename(columns={"sector_code":"buying_sector", "supplier_sector_code":"input_sector", 0:"inventory_duration_target"})
inventory_duration_targets.to_csv(os.path.join(output_folder, "inventory_duration_targets.csv"), index=False)