Run this file after running "1_Clean_Eora.ipynb"

This file is used to generate paratmers used for estimating trade cost and running models

# Set up

In [31]:
import os
import pandas as pd 
import numpy as np
from equilibrium import equilibrium


In [32]:
# Set working directory
wd = os.path.expanduser("~/Dropbox/Tariff_Project")
os.chdir(wd)
print(f"Current working directory: {os.getcwd()}")

# 3. Define data paths and parameters
# Path to raw Eora BP data
data_path = os.path.join(wd, "3_Result/eora_clean")


save_path = os.path.join(wd, "3_Result/parameters")


# Verify paths
print(f"Cleaned Eora data folder: {data_path}")

Current working directory: /Users/yaolangzhong/Dropbox/Tariff_Project
Cleaned Eora data folder: /Users/yaolangzhong/Dropbox/Tariff_Project/3_Result/eora_clean


# Part I ： Run this part after running "1_Clean_Eora.ipynb"

In [33]:
# ----------------------------
#  Read country / sector lists
# ----------------------------
country_list = (
    pd.read_csv(os.path.join(data_path, "country_list.csv"), header=0)["0"]
    .astype(str)
    .tolist()
)

sector_list = (
    pd.read_csv(os.path.join(data_path, "sector_list.csv"), header=0)["0"]
    .astype(str)
    .tolist()
)

label_list = pd.read_csv(os.path.join(data_path, "labels.csv"), header=0)

# ----------------------------
#  Basic dimension information
# ----------------------------
N = len(country_list)  # number of countries
J = len(sector_list)   # number of sectors

print(f"{N = }, {J = }")


N = 37, J = 23


In [34]:
# -------------------------------------------------
#  Read input‑output tables, final demand, value added
# -------------------------------------------------

io = (
    pd.read_csv(os.path.join(data_path, "T_final_2017.csv"), header=0)
    .fillna(0)           
    .to_numpy(float)      
)

fd = (
    pd.read_csv(os.path.join(data_path, "FD_final_2017.csv"), header=0)
    .fillna(0)
    .to_numpy(float)
)



va = (
    pd.read_csv(os.path.join(data_path, "VA_final_2017.csv"), header=0)
    .fillna(0)
    .to_numpy(float)
)

print(f"io shape: {io.shape}, fd shape: {fd.shape}, va shape: {va.shape}")

io shape: (851, 851), fd shape: (851, 222), va shape: (6, 851)


## alpha(N,J): Calibrate $\alpha_n^{j}$ (preference paramter) in household utility function 


In [35]:
# Final demand accounts include: 
# 1.Household final consumption 
# 2.Non-profit institutions serving households
# 3.Government final consumption 
# 4.Gross fixed capital formation
# 5.Changes in inventories 
# 6.Acquisitions less disposals of valuables

# Here I exclude 5 and 6 to better represent household preference
fd_n = 6 
fd_n_2 = 4
keep_fd_n = [0, 1, 2, 3]  

# Step 1: select the four accounts (exclude the last two)
cols_to_select = []
for c_idx in range(N):
    base = c_idx * fd_n
    cols_to_select.extend([base + a for a in keep_fd_n])

fd_select = fd[:, cols_to_select]      # (N*J, 4*N)

# Step 2：sum up over accounts for final demand 
fd_sum = fd_select.reshape(fd_select.shape[0], N, fd_n_2).sum(axis=2) # reshape to (NJ, N, 4 and sum up over the last dim)

# Step 3: sum up over countries for the same sector
fd_sum_reshaped = fd_sum.reshape(N, J, N)
fd_final = fd_sum_reshaped.sum(axis=0)         # (J, N)：（j,n）is country n's total comsumption in sector j, no matter where it comes from 

# ----------- correct above - checked by hand --------------

# Step 4: calculate shares
total_fd = fd_final.sum(axis=0, keepdims=True) # (1, N) # sum up over all sectors for each country, that is, each country's total final demand (total final consumption)
Alpha_jn = fd_final / total_fd                 # (J, N) 
Alpha_nj = Alpha_jn.T                          # (N, J)

# Step 5: adjust
Alpha_nj = np.clip(Alpha_nj, 0.00001, 0.99999)
Alpha_nj = Alpha_nj / Alpha_nj.sum(axis=1, keepdims=True)


In [36]:
# Check if it satisfies the model

alpha = Alpha_nj.copy()  # alpha is the final variable name we use in model simulation

# Check
# Check 1: For each n, the sum over j equals 1
sum_by_n = np.sum(alpha, axis=1)
check_sum = np.allclose(sum_by_n, 1, atol=1e-6)  # Allowing a small numerical tolerance
if check_sum:
    print("For each country, the sum over secotrs equals 1 ✅")
else:
    print("There are some country where the sum over sectors is not 1 ❌")
    print("The issue occurs at the following n indices:", np.where(np.abs(sum_by_n - 1) > 1e-6)[0])
    print("The corresponding sums are:", sum_by_n[np.where(np.abs(sum_by_n - 1) > 1e-6)[0]])
# Check 2: Every value in alpha is between 0 and 1
check_range = np.all((alpha >= 0) & (alpha <= 1))
if check_range:
    print("Every value in alpha is between 0 and 1 ✅")
else:
    print("There are values in alpha that are not between 0 and 1 ❌")
    print("These values are at positions:", np.where((alpha < 0) | (alpha > 1)))
    print("The values are:", alpha[(alpha < 0) | (alpha > 1)])



For each country, the sum over secotrs equals 1 ✅
Every value in alpha is between 0 and 1 ✅


In [37]:
# Save to csv, for check
alpha_df = pd.DataFrame(Alpha_nj,
                        index=country_list,
                        columns=sector_list)


#os.makedirs(save_path, exist_ok=True) 
alpha_df.to_csv(os.path.join(save_path, "Alpha_nj_2017.csv"))

print("Alpha_nj_2017.csv saved successfully.")

Alpha_nj_2017.csv saved successfully.


## GO(N,J): Calculate Gross Output of Intermediate Goods, Final Goods, and All Goods


In [38]:
# Total output: sum rows
io_GO = io.sum(axis=1)         # (NJ,1) # intermediate output: sum up for each row
# fd_GO = fd_select.sum(axis=1)  # (NJ,1) # final output:sum up for each row. Previously, I incorrectely wrote fd_GO = fd.sum(axis=1)  
fd_GO = fd.sum(axis=1)  # (NJ,1) # final output:sum up for each row. Previously, I incorrectely wrote fd_GO = fd.sum(axis=1)
GO_temp    = io_GO + fd_GO         # (NJ,)

GO = GO_temp.reshape(N, J, order="C")  # (N,J) # row - country, col - sector

go_file = os.path.join(save_path, "GO_2017.csv")
pd.DataFrame(GO, index=country_list, columns=sector_list).to_csv(go_file)

## $\beta(N,J)$: Calibrate $\gamma_n^j$ (value-added share) in production function 

In [39]:
# Value added accounts include: 1.Compensation of employees; 2.Taxes on production; 4.Net operating surplus; 5. Net mixed income; 6. Consumption of fixed capital
# Value added accounts exclude: 3. Subsidies on production
# Reason: for some countries, Subsidies on production is large negative number, resulting in the sum of VA negative

va_total = np.delete(va, 2, axis=0).sum(axis=0)   # (N·J,)

Gamma_vec = va_total / GO_temp                        # (N·J,)
Gamma_nj  = Gamma_vec.reshape(N, J, order="C")   # # row - country, col - sector


gt_one = Gamma_nj > 1
if gt_one.any():
    print(f"There are {gt_one.sum()} values greater than 1. They have been replaced with 0.99.")
    Gamma_nj[gt_one] = 0.99
else:
    print("No values are greater than 1.")

lt_zero = Gamma_nj < 0
if lt_zero.any():
    print(f"There are {lt_zero.sum()} values less than 0. They have been replaced with 0.01.")
    Gamma_nj[lt_zero] = 0.01
else:
    print("No values are less than 0.")

# Since further adjustment may be needed, DO NOT save it now; save after calculating gamma_njk

There are 23 values greater than 1. They have been replaced with 0.99.
No values are less than 0.


## $\gamma(N,J,J)$: Calibarte $\gamma_n^{j,k}$

Note: gamma(n,j,k) refers to the share of input from sector k in the production of nj 
NOTE: the last dimension (k) is from sector in this code, j dimension is destination sector in this code

In [40]:
# Step 1: sum up over sectors
# Sum up each sectors. eg. sector 1 = sum of sector 1 of each countries (row1, row1+J, row1+2J...)
# The aim is to sum up all goods from the same sector across countries (no matter where they from)

io_sector_sum = io.reshape(N, J, -1).sum(axis=0)      # (J, N*J) # first reshape to (N,J,NJ),then, sum up over N, that is ,sum up across sectors no matter where they comes from 

# Step 2: Calculate Total Input (column sums)
total_input = io.sum(axis=0)                          # (N*J,)

# Step 3: Gamma_k_nj: row are the input sectors; colums are user "country-sector"
with np.errstate(divide="ignore", invalid="ignore"):
    Gamma_k_nj = io_sector_sum / total_input          # (J, N*J) 
Gamma_nj_k = Gamma_k_nj.T                             # (N*J, J) row - used by; col - from sector

# Step 4: Check if Gamma_nj_k contains any NA values
nan_mask = np.isnan(Gamma_nj_k)
if nan_mask.any():
    rows_with_nan = np.where(nan_mask.any(axis=1))[0]   
    n_idx = rows_with_nan // J
    j_idx = rows_with_nan %  J
    Gamma_nj[n_idx, j_idx] = 0.9999
    Gamma_nj_k[nan_mask] = 0.0001
    print("NA values have been handled.")
else:
    print("No NA values found in Gamma_nj_k.")

# NA appears because input of "KOR - Recycling", "ESP - Others" are all zeros across all coutries and sectors

# Step 5: Check and handle values less than 0 or greater than 1 in Gamma_nj_k
lt_zero = Gamma_nj_k < 0
if lt_zero.any():
    rows = np.where(lt_zero.any(axis=1))[0]
    n_idx = rows // J
    j_idx = rows %  J
    Gamma_nj_k[lt_zero] = 0.0001
    Gamma_nj[n_idx, j_idx] = 0.9999
    handled = True
else:
    handled = False

gt_one = Gamma_nj_k > 1
if gt_one.any():
    rows = np.where(gt_one.any(axis=1))[0]
    n_idx = rows // J
    j_idx = rows %  J
    Gamma_nj_k[gt_one] = 0.9999
    Gamma_nj[n_idx, j_idx] = 0.0001
    handled = True

print("Values less than 0 or greater than 1 have been handled." if handled
    else "No values less than 0 or greater than 1 found in Gamma_nj_k.")

# Step 6: Adjust to meet the model
row_sum = Gamma_nj_k.sum(axis=1)             # (N*J,)
row_rest = 1.0 - Gamma_nj.ravel(order="C")   # (N*J,)


scale = row_rest / row_sum  
Gamma_nj_k *= scale[:, None] # (NJ,J)



NA values have been handled.
No values less than 0 or greater than 1 found in Gamma_nj_k.


In [41]:
# Check 

beta = Gamma_nj.copy()  # beta is the variable name used in model simulation

# Check: Every value in beta is between 0 and 1
check_range = np.all((beta >= 0) & (beta <= 1))
if check_range:
    print("Every value in beta is between 0 and 1 ✅")
else:
    print("There are values in beta that are not between 0 and 1 ❌")
    print("These values are at positions:", np.where((beta < 0) | (beta > 1)))
    print("The values are:", beta[(beta < 0) | (beta > 1)])



Every value in beta is between 0 and 1 ✅


In [42]:
# Check 
# Check 1: Every value in gamma is between 0 and 1

gamma = Gamma_nj_k.reshape((N, J, J)) # gamma is the variable name used in model simulation:  $gamma(n, j, k)$ : country n, using sector j, producing sector k

invalid_values = (gamma < 0) | (gamma > 1)

if np.any(invalid_values):
    print("There are values in gamma that are not between 0 and 1 ❌")
    print("These values are at positions:", np.where(invalid_values))
    print("The values are:", gamma[invalid_values])
else:
    print("Every value in gamma is between 0 and 1 ✅")

# Check 2: sum(k) gamma[n,j,k] + beta[n,j] = 1
temp = np.sum(gamma, axis = 2) + beta
is_valid = np.allclose(temp, 1, atol = 1e-5)

if is_valid:
    print("Condition satisfied: sum(k) gamma[n, j, k] + beta[n, j] = 1 ✅")
else:
    print("Condition not satisfied: sum(k) gamma[n, k, j] + beta[n, j] ≠ 1 ❌")
    print("Positions where the condition fails:", np.where(~np.isclose(temp, 1, atol=1e-5)))
    print("Values that do not satisfy the condition:", temp[~np.isclose(temp, 1, atol=1e-5)])

Every value in gamma is between 0 and 1 ✅
Condition satisfied: sum(k) gamma[n, j, k] + beta[n, j] = 1 ✅


In [43]:
# Save to csv
gamma_njk_df = pd.DataFrame(Gamma_nj_k,
                            index=[f"{country}_{sector}"
                                for country in country_list
                                for sector  in sector_list],
                            columns=sector_list)

gamma_nj_df  = pd.DataFrame(Gamma_nj,
                            index=country_list,
                            columns=sector_list)

gamma_njk_df.to_csv(os.path.join(save_path, "Gamma_njk_2017.csv"))
gamma_nj_df.to_csv (os.path.join(save_path, "Gamma_nj_2017.csv"))

print("Gamma_njk_2017.csv and Gamma_nj_2017.csv saved successfully.")

Gamma_njk_2017.csv and Gamma_nj_2017.csv saved successfully.


## VA: shape (N, )

In [44]:
va_matrix = va_total.reshape(N, J)   

VA = va_matrix.sum(axis=1)

df_va = pd.DataFrame({
    "Country": country_list,
    "ValueAdded": VA
})

df_va.to_csv("3_Result/parameters/VA.csv", index=False, encoding="utf-8")


# Check if every value in VA is greater than 0
check_positive = np.all(VA > 0)
if check_positive:
    print("Every country's value added is greater than 0 ✅")
else:
    print("There are values in VA that are less than or equal to 0 ❌")
    print("These values are at positions:", np.where(VA <= 0))
    print("The values are:", VA[VA <= 0])

Every country's value added is greater than 0 ✅


# Part II: Run this part after running "3_Estimate_TradeCost.ipynb"


## $\theta_n$

As that decided in "3_Estimate_TradeCost.ipynb"

In [45]:
# Trade elasticity: take from "Bolhuis, M. A., Chen, M. J., & Kett, B. R. (2023). Fragmentation in global trade: Accounting for commodities. International Monetary Fund."

##############  Sector megering case ######################

# 1 Agriculture, Fishing: 2.91                               
# 2 Mining and Quarrying: 3.41                               
# 3 Food & Beverages: 4.17                                     
# 4 Textiles and Wearing Apparel: 4.71
# 5 Wood and Paper: (8.8 + 8.21) /2 = 8.505                                                                    
# 6 Petroleum, Chemical and Non-Metallic Mineral Products:  (3.67 + 10.56 + 6.75 + 4.79)/4 = 6.4425
# 7 Metal Products: （7.39 + 4.22）/2 = 5.805
# 8 Electrical and Machinery:  (5.01 + 5.14 + 4.11) /3 = 4.753                                                      
# 9 Transport Equipment: (8.92 + 8.99)/2 = 8.955                               
# 10 Other Manufacturing, Recycling :  4.06                
# ---------------------------------------------------------------------
# Services sectors: 8.35

theta = np.array([2.91, 3.14, 4.17, 4.71, 8.505, 6.4425, 5.805, 4.753, 8.955, 4.06,
                8.35, 8.35, 8.35, 8.35, 8.35, 8.35, 8.35, 8.35, 8.35, 8.35, 8.35, 8.35, 8.35])



## $\tilde{\tau}$: shape $(N, N, J)$
The tariff rate vector, the $(n, i, j)$ denote the $(1+\tau)$ tariff rate of country $n$ on country $i$ for goods of sector $j$

In [46]:
# Import tariff data constructed in "3_Estimate_TradeCost.ipynb"
# Here I choose "tariff" column to apply to my model

tariff_all  = pd.read_csv(os.path.join(save_path, "All_Tariff_2017.csv"))
tariff_df  = tariff_all .iloc[:, 1:]
tariff_df = tariff_df[['Importer', 'Exporter', 'Sector', "tariff"]]

tariff_df['Exporter_Sector'] = tariff_df['Exporter'] + '_' + tariff_df['Sector']
tariff_df['Importer'] = pd.Categorical(
    tariff_df['Importer'],
    categories=tariff_df['Importer'].unique(),
    ordered=True
)
tariff_df['Exporter_Sector'] = pd.Categorical(
    tariff_df['Exporter_Sector'],
    categories=tariff_df['Exporter_Sector'].unique(),
    ordered=True
)

# pivot and reshape
tariff_matrix = tariff_df.pivot_table(
    index='Importer',
    columns='Exporter_Sector',
    values='tariff',
    aggfunc='first',
    observed=False
)
tariff_np   = tariff_matrix.to_numpy()
tariff_base = tariff_np.reshape((N, N, J))

# Construct tilde_tau
# tilde_tau[n,i,j]: country n's tariff on goods j from contry i (n:importer, i:exporte)
tilde_tau = tariff_base + 1


## Xf(N,J) and Xm(N,J): Calculate Expenditure for final goods and intermediate goods

This is not necessary for model simulation; just for check

In [47]:
# The observed data is basic price

# To obatin X in purchase price, we multiply by tau (1 + tariff)

Mm_temp = io.reshape(N, J, N, J).sum(axis = 3) # (exporter, sector, importer)
Mm = Mm_temp.transpose(2, 0, 1)  # (importer, exporter, sector) 
# adjust by \tau
tau_esI = np.transpose(tilde_tau, (1, 2, 0))   

Xm_bilateral = Mm_temp * tau_esI # (exporter, sector, importer)
Xm  =  Xm_bilateral.sum(axis = 0 ).T   # (N,J)


Mf_temp = fd.reshape(N,J,N,fd_n).sum(axis = 3) # (exporter, sector, importer)
Mf = Mf_temp.transpose(2, 0, 1)  # (importer, exporter, sector)
Xf_bilateral = Mf_temp  * tau_esI # (exporter, sector, importer)
Xf  =  Xf_bilateral.sum(axis = 0 ).T # (N,J)  

M = Mm + Mf   # (importer, exporter, sector) 

X = Xf + Xm  # (N,J)


# save
df_X = pd.DataFrame(X,
                index=country_list,
                columns=sector_list)
df_X.to_csv("3_Result/parameters/X.csv", index=False, encoding="utf-8")

df_Xm = pd.DataFrame(Xm,
                index=country_list,
                columns=sector_list)
df_Xm.to_csv("3_Result/parameters/Xm.csv", index=False, encoding="utf-8")


df_Xf = pd.DataFrame(Xf,
                index=country_list,
                columns=sector_list)
df_Xf.to_csv("3_Result/parameters/Xf.csv", index=False, encoding="utf-8")



##  $D_n$: Trade Deficit (Import - Export)

In [48]:
# M: shape (N, N, J)
imports = M.sum(axis=1)   # shape (N, J) ─ Σ_i M_{n i}^j
exports = M.sum(axis=0)   # shape (N, J) ─ Σ_i M_{i n}^j

D_nj = imports - exports     # shape (N, J) ─ D_n^j

D = D_nj.sum(axis=1) # D.shape (N,)

df = pd.DataFrame({
    'Country': country_list,
    'IM  - EX': D
})

df.to_csv("3_Result/parameters/D.csv", index=False, encoding="utf-8")


## pif,pim,pit(N,N,J): Calibrate intermediate goods trade share $\pi_{ni}^{j,I}$, final goods trade share $\pi_{ni}^{j,F}$, and total trade share $\pi_{ni}^{j}$

pi(n,i,j) n is  importer, i is exporter, j is sector

In [49]:
Xf_nij= Xf_bilateral.transpose(2, 0, 1)  # (importer, exporter, sector)
Xm_nij = Xm_bilateral.transpose(2, 0, 1)  # (importer, exporter, sector)
X_nij = Xf_nij + Xm_nij  # (importer, exporter, sector)


# Total trade share
denom = X_nij.sum(axis=1, keepdims=True)        # shape (N, 1, J)
with np.errstate(divide="ignore", invalid="ignore"):
    pi = np.divide(X_nij, denom, where=denom != 0)   
    pi = np.nan_to_num(pi)

# Intermediate goods trade share
denom = Xm_nij.sum(axis=1, keepdims=True)        # shape (N, 1, J)
with np.errstate(divide="ignore", invalid="ignore"):
    pim = np.divide(Xm_nij, denom, where=denom != 0)   
    pim = np.nan_to_num(pim)


# Final goods trade share
denom = Xf_nij.sum(axis=1, keepdims=True)        # shape (N, 1, J)
with np.errstate(divide="ignore", invalid="ignore"):
    pif = np.divide(Xf_nij, denom, where=denom != 0)   
    pif = np.nan_to_num(pif)

pi_sets = {
    "final": pif,
    "intermediate": pim,
    "all": pi
}

tol = 1e-5
for name, pi in pi_sets.items():
    temp = np.sum(pi, axis=1)
    is_valid = np.allclose(temp, 1, atol=tol)

    if is_valid:
        print(f"{name:>12} ✅ Consition satisfied: ∑₍i₎ π[{name}][n,i,j] = 1")
    else:
        bad = ~np.isclose(temp, 1, atol=tol)
        print(f"{name:>12} ❌ Consition not satisfied：there are {bad.sum()} positions not equal to 1")
        print("  Position (n,j)：", np.argwhere(bad))
        print("  Actual sum：", temp[bad])


       final ✅ Consition satisfied: ∑₍i₎ π[final][n,i,j] = 1
intermediate ✅ Consition satisfied: ∑₍i₎ π[intermediate][n,i,j] = 1
         all ✅ Consition satisfied: ∑₍i₎ π[all][n,i,j] = 1


# Check

In [50]:
from equilibrium import equilibrium
from cp_functions import *

kappa_base = np.ones((N,N,J))

w_hat_initial = np.ones(N)
P_hat_initial = np.ones((N, J)) 

D_2 = -D

g_tem = gamma.copy()  # gamma is the variable name used in model simulation
gamma_TRY = g_tem.transpose(0, 2, 1)  

In [51]:
w_base, P_base, X_base, pi_base = equilibrium(gamma_TRY, beta, theta, tilde_tau, kappa_base, pi, alpha, VA, D_2, N, J, X, w_hat_initial, P_hat_initial)


Round 1: w_hat_min = 0.9847631708725898, w_hat_max = 1.007157075187992, min_X_prime = 14376.477628654764, max_X_prime = 9824589673.931076, wfmax = 0.0152368291274102, Pfmax = 2.220446049250313e-16 
Round 2: w_hat_min = 0.9724873247434175, w_hat_max = 1.0134391559220177, min_X_prime = 14353.687740003581, max_X_prime = 9871657459.359821, wfmax = 0.012275846129172319, Pfmax = 0.01217136238906924 
Round 3: w_hat_min = 0.9640731215134909, w_hat_max = 1.0186328347280125, min_X_prime = 14333.40180954995, max_X_prime = 9905582482.76643, wfmax = 0.008414203229926587, Pfmax = 0.010048545465693226 
Round 4: w_hat_min = 0.9591435304528648, w_hat_max = 1.0227826960832436, min_X_prime = 14315.303584797188, max_X_prime = 9930487919.592762, wfmax = 0.005056638535977487, Pfmax = 0.008079063772684525 
Round 5: w_hat_min = 0.9567804701159743, w_hat_max = 1.0260348727344408, min_X_prime = 14300.662305652924, max_X_prime = 9949131859.288157, wfmax = 0.0032521766511972583, Pfmax = 0.006150424871183557 
Roun

In [52]:
w_base

array([0.97033871, 0.96911607, 0.97387002, 0.99820079, 0.95593071,
       0.97606713, 0.98658432, 0.97576458, 0.96835557, 0.99024819,
       0.98648731, 0.97833887, 0.96259532, 0.96782096, 0.98152818,
       1.01006971, 0.9860503 , 0.95696931, 0.97516203, 0.97440822,
       1.03528361, 0.98272038, 0.97339196, 0.97339416, 0.97102776,
       0.99308295, 0.96569517, 0.98838707, 0.9764247 , 0.97497409,
       0.97841698, 0.99760393, 0.97766159, 0.98855539, 1.03775788,
       0.97631686, 1.00836504])

In [23]:
real_data = np.load('model_data_2017.npz')

for name in real_data.files:
    arr = real_data[name]
    print(f"{name}: shape={arr.shape}, dtype={arr.dtype}")



N: shape=(), dtype=int64
J: shape=(), dtype=int64
country_list: shape=(37,), dtype=<U3
sector_list: shape=(23,), dtype=<U36
alpha: shape=(37, 23), dtype=float64
beta: shape=(37, 23), dtype=float64
gamma: shape=(37, 23, 23), dtype=float64
theta: shape=(23,), dtype=float64
pif: shape=(37, 37, 23), dtype=float64
pim: shape=(37, 37, 23), dtype=float64
pit: shape=(37, 37, 23), dtype=float64
Xf: shape=(37, 23), dtype=float64
Xm: shape=(37, 23), dtype=float64
X: shape=(37, 23), dtype=float64
tilde_tau: shape=(37, 37, 23), dtype=float64
D: shape=(37,), dtype=float64
VA: shape=(37,), dtype=float64


In [25]:
N_R = int(real_data['N'])
J_R = int(real_data['J'])
country_list_R = [c.decode("utf-8") if isinstance(c, bytes) else c
                for c in real_data["country_list"].tolist()]
sector_list_R = [s.decode("utf-8") if isinstance(s, bytes) else s
                for s in real_data["sector_list"].tolist()]
theta_R = real_data['theta']
X_R = real_data['X']  # X[n,j]: country n's total output in sector j
alpha_R = real_data['alpha']
beta_R = real_data['beta']
gamma_R = real_data['gamma']
pi_R = real_data['pit']


In [27]:


pairs_to_check = {
    "N":     (N,      N_R),
    "J":     (J,      J_R),
    "alpha": (alpha,  alpha_R),
    "beta":  (beta,   beta_R),
    "gamma": (gamma_TRY,  gamma_R),
    "theta": (theta,  theta_R),
}

ABS_TOL = 1e-3          # 仅按绝对误差判断，阈值 0.0001
total_mismatch = 0
report = []

for name, (lhs, rhs) in pairs_to_check.items():
    a, b = np.asarray(lhs), np.asarray(rhs)

    # 形状不同直接记为全不一致
    if a.shape != b.shape:
        report.append(f"[{name}] 形状不一致 {a.shape} vs {b.shape} → 计为全部不一致")
        mism = a.size
    else:
        diff = np.abs(a - b)
        mask_equal = diff <= ABS_TOL          # 绝对误差≤阈值视为一致
        mism = diff.size - mask_equal.sum()

        if mism == 0:
            report.append(f"[{name}] ✔︎ 全部一致")
        else:
            pos_max = np.unravel_index(diff.argmax(), a.shape)
            max_err = diff[pos_max]
            report.append(
                f"[{name}] ❌ 不一致 {mism} 个；最大误差 {max_err:.6g} @ {pos_max}"
            )

    total_mismatch += mism

print("\n".join(report))
print(f"\n=== 总计不一致元素数: {total_mismatch} ===")


[N] ✔︎ 全部一致
[J] ✔︎ 全部一致
[alpha] ✔︎ 全部一致
[beta] ✔︎ 全部一致
[gamma] ❌ 不一致 15882 个；最大误差 0.461876 @ (np.int64(32), np.int64(6), np.int64(9))
[theta] ✔︎ 全部一致

=== 总计不一致元素数: 15882 ===


In [53]:
wd = "/Users/yaolangzhong/Dropbox/Carbon_Emission_Analysis_2017/volatility_analysis/Furusawa2023/data"
os.chdir(wd)
os.getcwd()

'/Users/yaolangzhong/Dropbox/Carbon_Emission_Analysis_2017/volatility_analysis/Furusawa2023/data'

In [54]:
np.savez('data_2017.npz',
        N = N,
        S = J,
        country_list = country_list, 
        sector_list = sector_list, 
        alpha = alpha, 
        beta = beta, 
        gamma = gamma, 
        theta = theta, 
        pif = pif,
        pim = pim,
        pi = pi,
        Xf = Xf,
        Xm = Xm,
        X = X,
        tilde_tau = tilde_tau,
        D = D, 
        V = VA)