In [None]:
import numpy as np
import pandas as pd

base_path = "~/Dropbox/GVC2023/output/data"


### $\alpha$: shape $(N, J)$

In [None]:
df = pd.read_csv(f"{base_path}/alpha.csv")

# Choose ttype = sa
df_sa = df[df['ttype'] == 'sa']

# keep country, industry and alpha 
df_selected = df_sa.iloc[:, [0, 2, 6]]

# reorder country and industry
df_sorted = df_selected.sort_values(by=[df_selected.columns[0], df_selected.columns[1]])

# change to numpy 
df_pivot = df_sorted.pivot(index=df_sorted.columns[0], columns=df_sorted.columns[1], values=df_sorted.columns[2])
alpha = df_pivot.to_numpy()


In [None]:
N,J = alpha.shape

### $\beta$: shape $(N, J)$


The $(n, j)$ element denotes the value-add share of goods from sector $j$ for country $n$

In [None]:
df = pd.read_csv(f"{base_path}/beta_s.csv")

# Choose ttype = sa
df_sa = df[df['ttype'] == 'sa']

# keep country, industry and beta 
df_selected = df_sa.iloc[:, [0, 1, 3]]

df_sorted = df_selected.sort_values(by=[df_selected.columns[0], df_selected.columns[1]])

df_pivot = df_sorted.pivot(index=df_sorted.columns[0], columns=df_sorted.columns[1], values=df_sorted.columns[2])
beta = df_pivot.to_numpy()

### $\gamma$: shape $(N, J, J)$

The $(n, j, k)$ : country n, using sector j, producing sector k

In [None]:
df = pd.read_csv(f"{base_path}/beta_sr.csv")

# Choose ttype = sa
df_sa = df[df['ttype'] == 'sa']

df_selected =  df_sa.iloc[:, [0, 3, 2, 4]]

df_sorted = df_selected.sort_values(by=[df_selected.columns[0], df_selected.columns[1],  df_selected.columns[2]])


values = df_sorted['beta_sr'].to_numpy()

gamma = values.reshape((N, J, J))



### $\theta$: shape $(J, )$

Trade elasticity

In [None]:
theta = np.array([6.26, 8.05, 7.31, 6.31, 9.12, 11.37, 6.10, 6.31, 6.22, 4.78, 7.78, 7.43, 9.69, 7.13, 8.01, 
                  7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31])  



### $\pi^f$: shape $(N, N, J)$

The expenditure shape vector, the $(n, i, j)$ element denotes the country $n$'s share of expenditure of goods from sector $j$ from country $i$

In [None]:
df = pd.read_csv(f"{base_path}/pi_sf_nit.csv")

# Here we choose base year 2009
# Choose base year 2009 for 'sa' records
df_sa = df[(df['ttype'] == 'sa') & (df['Year'] == 2009)]

df_selected = df_sa.iloc[:, [0, 1, 3, 6]]

df_sorted = df_selected.sort_values(by=[df_selected.columns[0], df_selected.columns[1],  df_selected.columns[2]])


values = df_sorted['pi_sf_nit'].to_numpy()

pi_f = values.reshape((N,N,J))


### $\pi^m$: shape $(N, N, J)$

The expenditure shape vector, the $(n, i, j)$ element denotes the country $n$'s share of expenditure of goods from sector $j$ from country $i$

In [None]:
df = pd.read_csv(f"{base_path}/pi_sm_nit.csv")

# Here we choose base year 2009
# Choose base year 2009 for 'sa' records
df_sa = df[(df['ttype'] == 'sa') & (df['Year'] == 2009)]

df_selected = df_sa.iloc[:, [0, 1, 3, 6]]

df_sorted = df_selected.sort_values(by=[df_selected.columns[0], df_selected.columns[1],  df_selected.columns[2]])


values = df_sorted['pi_sm_nit'].to_numpy()

pi_m = values.reshape((N,N,J))

### Trade Deficit $D_n$

In [None]:
df = pd.read_csv(f"{base_path}/Dn.csv")

# Choose ttype = sa

df_sa = df[(df['ttype'] == 'sa') & (df['Year'] == 2009)]

# keep country, industry and alpha 
df_selected = df_sa.iloc[:, [0,3]]

df_sorted = df_selected.sort_values(by=[df_selected.columns[0]])

D = df_sorted['Dn'].to_numpy()


### VA: shape (N, )

note: here I assume that the total value added is 2 times the value added in tradable sectors

In [None]:
df = pd.read_csv(f"{base_path}/value_added_tradables.csv")

# Choose ttype = sa
df_sa = df[ (df['Year'] == 2009)]

# keep country, industry and alpha 
df_selected = df_sa.iloc[:, [0,3]]

df_sorted = df_selected.sort_values(by=[df_selected.columns[0]])

df_grouped = df_sorted.groupby(df_sorted.columns[0])[df_sorted.columns[1]].sum().reset_index()

VA = df_grouped['value_added'].to_numpy() * 2


### $\tilde{\tau}$: shape $(N, N, J)$
The tariff rate vector, the $(n, i, j)$ denote the $(1+\tau)$ tariff rate of country $n$ on country $i$ for goods of sector $j$

Note: I don't find the tariff data. Here I construct one.

In [None]:
tilde_tau = np.ones((N, N, J)) * 1.1 

In [None]:
np.savez('real_data.npz', N = N, J = J, alpha = alpha, beta = beta, gamma = gamma, theta = theta, pi_f = pi_f, pi_m = pi_m, D = D, VA = VA, tilde_tau = tilde_tau)

# Turn to 2017 data

In [51]:
import numpy as np
import pandas as pd
base_path = "~/Desktop/Carbon_Emission_Analysis_2017"

In [52]:
# Number of countries and sectors
country_list_df = pd.read_csv(f"{base_path}/3_Result/results_Eora/country_list.csv")
country_list = country_list_df.iloc[:, 0].tolist()  

N = len(country_list)

sector_list_df =  pd.read_csv(f"{base_path}/3_Result/results_Eora/sector_list.csv")
sector_list = sector_list_df.iloc[:, 0].tolist()  

J = len(sector_list)

tradable_sector_list = sector_list[0:10]

print(f"We have {N} countries and {J} sectors.")

We have 36 countries and 23 sectors.


### $\alpha$: shape $(N, J)$

The $(n, j)$ element denotes the consumption share of goods from sector $j$ for country $n$

In [53]:
# Consumption shares
# alpha[n,j]: country n's comsumption share in sector j
alpha_df = pd.read_csv(f"{base_path}/3_Result/results_parameters/Alpha_nj_2017.csv")
alpha_df = alpha_df.iloc[:, 1:] 
alpha = alpha_df.to_numpy()

# Check
# Check 1: For each n, the sum over j equals 1
sum_by_n = np.sum(alpha, axis=1)
check_sum = np.allclose(sum_by_n, 1, atol=1e-6)  # Allowing a small numerical tolerance
if check_sum:
    print("For each country, the sum over secotrs equals 1 ✅")
else:
    print("There are some country where the sum over sectors is not 1 ❌")
    print("The issue occurs at the following n indices:", np.where(np.abs(sum_by_n - 1) > 1e-6)[0])
    print("The corresponding sums are:", sum_by_n[np.where(np.abs(sum_by_n - 1) > 1e-6)[0]])
# Check 2: Every value in alpha is between 0 and 1
check_range = np.all((alpha >= 0) & (alpha <= 1))
if check_range:
    print("Every value in alpha is between 0 and 1 ✅")
else:
    print("There are values in alpha that are not between 0 and 1 ❌")
    print("These values are at positions:", np.where((alpha < 0) | (alpha > 1)))
    print("The values are:", alpha[(alpha < 0) | (alpha > 1)])

For each country, the sum over secotrs equals 1 ✅
Every value in alpha is between 0 and 1 ✅


### $\beta$: shape $(N, J)$

The $(n, j)$ element denotes the value-add share of goods from sector $j$ for country $n$

In [54]:
# beta[n,j]: country n's value-added share in sector j
beta_df = pd.read_csv(f"{base_path}/3_Result/results_parameters/Gamma_nj_2017.csv")
beta_df  = beta_df.iloc[:, 1:]
beta = beta_df.to_numpy() 

# Check: Every value in beta is between 0 and 1
check_range = np.all((beta >= 0) & (beta <= 1))
if check_range:
    print("Every value in beta is between 0 and 1 ✅")
else:
    print("There are values in beta that are not between 0 and 1 ❌")
    print("These values are at positions:", np.where((beta < 0) | (beta > 1)))
    print("The values are:", beta[(beta < 0) | (beta > 1)])

Every value in beta is between 0 and 1 ✅


### $\gamma$: shape $(N, J, J)$

The $(n, j, k)$ element denotes the sector $k$'s share in producing goods in sector $j$, country $n$ 

In [55]:
# Sectoral linkages parameter
# gamma[n,k,j]: sector k's share in producing goods in sector j, country n 
gamma_df = pd.read_csv(f"{base_path}/3_Result/results_parameters/Gamma_njk_2017.csv")
gamma_df  = gamma_df.iloc[:, 1:]
gamma_np = gamma_df.to_numpy()
gamma_temp = gamma_np.reshape((N, J, J))
gamma = gamma_temp
#gamma = np.transpose(gamma_temp, (0, 2, 1))


# Check 
# Check 1: Every value in gamma is between 0 and 1
invalid_values = (gamma < 0) | (gamma > 1)

if np.any(invalid_values):
    print("There are values in gamma that are not between 0 and 1 ❌")
    print("These values are at positions:", np.where(invalid_values))
    print("The values are:", gamma[invalid_values])
else:
    print("Every value in gamma is between 0 and 1 ✅")

# Check 2: sum(k) gamma[n,k,j] + beta[n,j] = 1
temp = np.sum(gamma, axis = 2) + beta
is_valid = np.allclose(temp, 1, atol = 1e-5)

if is_valid:
    print("Condition satisfied: sum(k) gamma[n, k, j] + beta[n, j] = 1 ✅")
else:
    print("Condition not satisfied: sum(k) gamma[n, k, j] + beta[n, j] ≠ 1 ❌")
    print("Positions where the condition fails:", np.where(~np.isclose(temp, 1, atol=1e-5)))
    print("Values that do not satisfy the condition:", temp[~np.isclose(temp, 1, atol=1e-5)])


Every value in gamma is between 0 and 1 ✅
Condition satisfied: sum(k) gamma[n, k, j] + beta[n, j] = 1 ✅


### $\theta$: shape $(J, )$

In [56]:

theta = np.array([6.26, 8.05, 7.31, 6.31, 10.25, 5.85, 7.78, 8.56, 7.13, 8.01, 
                  7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31, 7.31])  

assert len(theta) == J

### $\pi^f$: shape $(N, N, J)$

The expenditure shape vector, the $(n, i, j)$ element denotes the country $n$'s share of expenditure of goods from sector $j$ from country $i$

In [57]:
# Initial trade shares
#pi[n,i,j]: country n's import share from country i, in sector j
pi_df_temp = pd.read_csv(f"{base_path}/3_Result/results_parameters/trade_share_final_2017.csv")
pi_df = pi_df_temp .iloc[:,1:]
pi_df['Exporter_Sector'] = pi_df['Exporter'] + '_' + pi_df['Sector']

pi_df['Importer'] = pd.Categorical(pi_df['Importer'], categories=pi_df['Importer'].unique(), ordered=True)
pi_df['Exporter_Sector'] = pd.Categorical(pi_df['Exporter_Sector'], categories=pi_df['Exporter_Sector'].unique(), ordered=True)

pi_matrix = pi_df.pivot_table(index='Importer', columns='Exporter_Sector', values='Share', aggfunc='first', observed=False)

pi_np = pi_matrix .to_numpy()
pi_f = pi_np.reshape((N, N, J))

# Check
temp = np.sum(pi_f, axis=1)  
is_valid = np.allclose(temp, 1, atol=1e-5)  

if is_valid:
    print("Condition satisfied: sum(i) pi[n, i, j] = 1 ✅")
else:
    print("Condition not satisfied: sum(i) pi[n, i, j] ≠ 1 ❌")
    print("Positions where the condition fails:", np.where(~np.isclose(temp, 1, atol=1e-5)))
    print("Values that do not satisfy the condition:", temp[~np.isclose(temp, 1, atol=1e-5)])


Condition satisfied: sum(i) pi[n, i, j] = 1 ✅


### $\pi^m$: shape $(N, N, J)$

The expenditure shape vector, the $(n, i, j)$ element denotes the country $n$'s share of expenditure of goods from sector $j$ from country $i$

In [58]:
# Initial trade shares
#pi[n,i,j]: country n's import share from country i, in sector j
pi_df_temp = pd.read_csv(f"{base_path}/3_Result/results_parameters/trade_share_inter_2017.csv")
pi_df = pi_df_temp .iloc[:,1:]
pi_df['Exporter_Sector'] = pi_df['Exporter'] + '_' + pi_df['Sector']

pi_df['Importer'] = pd.Categorical(pi_df['Importer'], categories=pi_df['Importer'].unique(), ordered=True)
pi_df['Exporter_Sector'] = pd.Categorical(pi_df['Exporter_Sector'], categories=pi_df['Exporter_Sector'].unique(), ordered=True)

pi_matrix = pi_df.pivot_table(index='Importer', columns='Exporter_Sector', values='Share', aggfunc='first', observed=False)

pi_np = pi_matrix .to_numpy()
pi_m = pi_np.reshape((N, N, J))

# Check
temp = np.sum(pi_m, axis=1)  
is_valid = np.allclose(temp, 1, atol=1e-5)  

if is_valid:
    print("Condition satisfied: sum(i) pi[n, i, j] = 1 ✅")
else:
    print("Condition not satisfied: sum(i) pi[n, i, j] ≠ 1 ❌")
    print("Positions where the condition fails:", np.where(~np.isclose(temp, 1, atol=1e-5)))
    print("Values that do not satisfy the condition:", temp[~np.isclose(temp, 1, atol=1e-5)])


Condition satisfied: sum(i) pi[n, i, j] = 1 ✅


### $\tilde{\tau}$: shape $(N, N, J)$
The tariff rate vector, the $(n, i, j)$ denote the $(1+\tau)$ tariff rate of country $n$ on country $i$ for goods of sector $j$

In [59]:
# Load Tariff data

tariff_raw = pd.read_csv(f"{base_path}/3_Result/results_parameters/tariff_2017.csv")
tariff_df  = tariff_raw .iloc[:, 1:]
tariff_df['Exporter_Sector'] = tariff_df['Exporter'] + '_' + tariff_df['Sector']

tariff_df['Importer'] = pd.Categorical(tariff_df['Importer'], categories=tariff_df['Importer'].unique(), ordered=True)
tariff_df['Exporter_Sector'] = pd.Categorical(tariff_df['Exporter_Sector'], categories=tariff_df['Exporter_Sector'].unique(), ordered=True)

tariff_matrix = tariff_df.pivot_table(index='Importer', columns='Exporter_Sector', values='Tariff', aggfunc='first', observed=False)

tariff_np = tariff_matrix.to_numpy()
tariff_base = tariff_np.reshape((N, N, J))

# Construct tilde_tau
# tilde_tau[n,i,j]: country n's tariff on goods j from contry i (n:importer, i:exporte)
tilde_tau = tariff_base + 1

In [60]:
# Load X_nj[n,j]: country n's initial expenditure on sector j
X_df = pd.read_csv(f"{base_path}/3_Result/results_parameters/X_nj_2017.csv")
X_df = X_df.iloc[:, 1:] 
X = X_df.to_numpy()

### VA: shape (N, )

In [61]:
# Initial trade shares
#pi[n,i,j]: country n's import share from country i, in sector j
pi_df_temp = pd.read_csv(f"{base_path}/3_Result/results_parameters/trade_share_2017.csv")
pi_df = pi_df_temp .iloc[:,1:]
pi_df['Exporter_Sector'] = pi_df['Exporter'] + '_' + pi_df['Sector']

pi_df['Importer'] = pd.Categorical(pi_df['Importer'], categories=pi_df['Importer'].unique(), ordered=True)
pi_df['Exporter_Sector'] = pd.Categorical(pi_df['Exporter_Sector'], categories=pi_df['Exporter_Sector'].unique(), ordered=True)

pi_matrix = pi_df.pivot_table(index='Importer', columns='Exporter_Sector', values='Share', aggfunc='first', observed=False)

pi_np = pi_matrix .to_numpy()
pi = pi_np.reshape((N, N, J))

# Check
temp = np.sum(pi, axis=1)  
is_valid = np.allclose(temp, 1, atol=1e-5)  

if is_valid:
    print("Condition satisfied: sum(i) pi[n, i, j] = 1 ✅")
else:
    print("Condition not satisfied: sum(i) pi[n, i, j] ≠ 1 ❌")
    print("Positions where the condition fails:", np.where(~np.isclose(temp, 1, atol=1e-5)))
    print("Values that do not satisfy the condition:", temp[~np.isclose(temp, 1, atol=1e-5)])

Condition satisfied: sum(i) pi[n, i, j] = 1 ✅


In [62]:
# From X_nj Calculate VA
# Reshape beta to (N, 1, J) to allow broadcasting with pi_prime and tilde_tau_prime
VA = np.zeros(N)

for n in range(N):  
    for j in range(J):  
        inner_sum = 0
        for i in range(N):  
            inner_sum += X[i,j] * (pi[i,n,j] / tilde_tau[i,n,j])
        VA[n] += beta[n,j] * inner_sum

# Check if every value in VA is greater than 0
check_positive = np.all(VA > 0)
if check_positive:
    print("Every country's value added is greater than 0 ✅")
else:
    print("There are values in VA that are less than or equal to 0 ❌")
    print("These values are at positions:", np.where(VA <= 0))
    print("The values are:", VA[VA <= 0])

Every country's value added is greater than 0 ✅


## D: trade deficit

In [63]:
# From X_nj Caclulate Trade Deficit
D = np.zeros(N)

for n in range(N):
    for j in range(J):
        for i in range (N):
            IM = X[n,j] * pi[n,i,j] / tilde_tau[n,i,j]
            EX = X[i,j] * pi[i,n,j] / tilde_tau[i,n,j]

            D[n] += EX - IM


# EX = np.einsum('inj,inj,ij->n', pi, 1 / tilde_tau, X)  # shape: (N,)
# IM = np.einsum('nij,nij,nj->n', pi, 1 / tilde_tau, X)  # shape: (N,)


In [64]:
np.savez('real_data_2017.npz', country_list = country_list, sector_list = sector_list, N = N, J = J, alpha = alpha, beta = beta, gamma = gamma, theta = theta, pi_f = pi_f, pi_m = pi_m, D = D, VA = VA, tilde_tau = tilde_tau)