# Forward linkage calculation 

- This notebook contains code for the calculation of forward linkages indexes for 42 countries from the WIOD database, for the 2000-2014 period. 
- The calculation of backward linkage indexes, weighted forward linkage indexes and weighted backward linkages indexes requires slightly different code and is not carried out in a large chain of for loops.
- Feel free to reach out if you want the code for these other calculations or if you require the precise format of the data files. My email address: ebelingbarros@gmail.com
- If you want to reproduce the code above, be careful with the path of the csv files. The csv files should be available in a "data" folder, one for each country. 
- The common error ```ValueError: operands could not be broadcast together with shapes (56,56) (0,56)``` can be resolved by deleting the aditional csv files that are generated every time the calculation is performed.

### Importing libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from numpy.linalg import inv
import glob

### For loops that generate caculations for all years and countries

In [None]:
#Specifiy data path and filename 
path = r'data' 
all_files = glob.glob(path + "/*.csv")

#First for loop that specifies initial dataframe with read-in data
for filename in all_files:
        data = pd.read_csv(filename)
        
        #Specifiy target dataframe       
        first_year = ['1999']
        codes = ['A01', 'A02', 'A03', 'B', 'C10-C12', 'C13-C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C30', 'C31_C32', 'C33', 'D35', 'E36', 'E37-E39', 'F', 'G45', 'G46', 'G47', 'H49', 'H50', 'H51', 'H52', 'H53', 'I', 'J58', 'J59_J60', 'J61', 'J62_J63', 'K64', 'K65', 'K66', 'L68', 'M69_M70', 'M71', 'M72', 'M73', 'M74_M75', 'N', 'O84', 'P85', 'Q', 'R_S', 'T', 'U']
        forward_linkages = pd.DataFrame(index=codes, columns=first_year) 
        forward_linkages.insert(1, "codes", ['A01', 'A02', 'A03', 'B', 'C10-C12', 'C13-C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C30', 'C31_C32', 'C33', 'D35', 'E36', 'E37-E39', 'F', 'G45', 'G46', 'G47', 'H49', 'H50', 'H51', 'H52', 'H53', 'I', 'J58', 'J59_J60', 'J61', 'J62_J63', 'K64', 'K65', 'K66', 'L68', 'M69_M70', 'M71', 'M72', 'M73', 'M74_M75', 'N', 'O84', 'P85', 'Q', 'R_S', 'T', 'U'])
        forward_linkages = forward_linkages.set_index('codes')
        
        #Second for loop that performs actual calculation
        year_list = list(range(2000, 2015))
        for j in year_list:
            
                #Data preparation and cleaning
                data1 = pd.DataFrame(data, columns = ['Year','Code', 'Origin','A01', 'A02', 'A03', 'B', 'C10-C12', 'C13-C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C30', 'C31_C32', 'C33', 'D35', 'E36', 'E37-E39', 'F', 'G45', 'G46', 'G47', 'H49', 'H50', 'H51', 'H52', 'H53', 'I', 'J58', 'J59_J60', 'J61', 'J62_J63', 'K64', 'K65', 'K66', 'L68', 'M69_M70', 'M71', 'M72', 'M73', 'M74_M75', 'N', 'O84', 'P85', 'Q', 'R_S', 'T', 'U'])
                data2 = data1.loc[(data1['Year'] == j) & (data1['Origin'] == 'Domestic')]
                data3 = data2.drop(['Year', 'Code', 'Origin'], axis=1)
                data3 = data3.apply(pd.to_numeric, errors='coerce')
                
                #Matricial calculation steps
                flow_tbl = data3.to_numpy()
                mx_A = np.linalg.inv(np.identity(56) - flow_tbl)
                new = pd.DataFrame(data=mx_A)
                new_2 = new.values.sum()
                total_avg = new_2/(56**2)
                sum_lines = np.sum(mx_A,axis=0).tolist()
                forward = pd.DataFrame(data=sum_lines, columns=["values"])
                forward.insert(1, "codes", ['A01', 'A02', 'A03', 'B', 'C10-C12', 'C13-C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27', 'C28', 'C29', 'C30', 'C31_C32', 'C33', 'D35', 'E36', 'E37-E39', 'F', 'G45', 'G46', 'G47', 'H49', 'H50', 'H51', 'H52', 'H53', 'I', 'J58', 'J59_J60', 'J61', 'J62_J63', 'K64', 'K65', 'K66', 'L68', 'M69_M70', 'M71', 'M72', 'M73', 'M74_M75', 'N', 'O84', 'P85', 'Q', 'R_S', 'T', 'U'])
                forward = forward.set_index('codes')
                forward[j]=(forward['values']/56)/total_avg
                forward = forward.drop(['values'], axis=1)
                
                #Final steps 
                forward_linkages = pd.merge(forward_linkages, forward, on="codes")
                forward_linkages['Country'] = filename
                forward_linkages.Country = forward_linkages.Country.str[5:]
                forward_linkages.Country = forward_linkages.Country.str[:-4]
                forward_linkages.to_csv(filename+'.csv')

### Creating dataframe for forward linkages

In [None]:
# get data file names
path =r'data'
filenames = glob.glob(path + "/*.csv.csv")

# Create dfs list and append lists
dfs = []
for filename in filenames:
    dfs.append(pd.read_csv(filename))

# Concatenate all data into one DataFrame
df_final = pd.concat(dfs, ignore_index=True)
df_final.drop(columns=['1999'], inplace=True)
df_final=df_final[['codes','Country','2000','2001','2002','2003','2004','2005','2006','2007','2008','2009','2010','2011','2012','2013','2014']]
df_final