# Gravity Equation Estimation

In [1]:
import numpy as np
import os
import pandas as pd
import string as str
import math
import sys
import time

from scipy import optimize, special
import gurobipy as grb

from sklearn.preprocessing import LabelEncoder
from IPython.display import display, HTML

In [2]:
thePath = os.getcwd().split("veteran_students_mec_optim\\Pauline")[0]
tradedata = pd.read_csv(thePath + 'data_mec_optim\\gravity_wtodata\\1_TraditionalGravity_from_WTO_book.csv', sep=',')

In [3]:
tradedata = tradedata[['exporter', 'importer','year', 'trade', 'DIST','ln_DIST', 'CNTG', 'LANG', 'CLNY']]

In [4]:
tradedata.sort_values(['year','exporter','importer'], inplace = True)
tradedata.reset_index(inplace = True, drop = True)
tradedata.head()

Unnamed: 0,exporter,importer,year,trade,DIST,ln_DIST,CNTG,LANG,CLNY
0,ARG,ARG,1986,61288.590263,533.90824,6.280224,0,0,0
1,ARG,AUS,1986,27.764874,12044.574134,9.39637,0,0,0
2,ARG,AUT,1986,3.559843,11751.146521,9.371706,0,0,0
3,ARG,BEL,1986,96.102567,11305.285764,9.333026,0,0,0
4,ARG,BGR,1986,3.129231,12115.572046,9.402246,0,0,0


In [5]:
tradedata.loc[np.where(tradedata['importer']==tradedata['exporter'],True, False),['DIST', 'ln_DIST', 'CNTG', 'LANG', 'CLNY']]=0

In [6]:
nbt = len(tradedata['year'].unique())
nbi = len(tradedata['importer'].unique())
nbk = 4

In [7]:
Xhatnit = []
Dnikt = []

years = tradedata['year'].unique()
for t, year in enumerate(years):
    
    tradedata_year = tradedata[tradedata['year']==year]
    
    Xhatnit.append(tradedata_year.pivot(index = 'exporter', columns = 'importer', values ='trade').values)
    np.fill_diagonal(Xhatnit[t],0)
    
    Dnikt.append(tradedata_year[[ 'ln_DIST', 'CNTG', 'LANG', 'CLNY']].values)

In [8]:
Xnt = np.zeros((nbi,nbt))
Yit = np.zeros((nbi,nbt))

for t in range(nbt):
    Xnt[:,t] = Xhatnit[t].sum(axis = 1)
    Yit[:,t] = Xhatnit[t].sum(axis = 0)

In [9]:
totalmass_t = sum(Xhatnit).sum(axis=(0,1))/nbt
pihat_nit = Xhatnit/totalmass_t

In [10]:
meanD_k = np.asmatrix([mat.mean(axis = 0) for mat in Dnikt]).mean(axis = 0)
sdD_k = np.asmatrix([mat.std(axis = 0,ddof = 1) for mat in Dnikt]).mean(axis = 0)

Dnikt = [(mat - meanD_k)/sdD_k for mat in Dnikt]

In [11]:
p_nt = Xnt/totalmass_t
q_nt = Yit/totalmass_t
IX = np.repeat(1, nbi).reshape(nbi,1)
tIY = np.repeat(1, nbi).reshape(1,nbi)

In [12]:
f_nit = []
g_nit = []

for t in range(nbt):
    f_nit.append(p_nt[:,t].reshape(nbi,1).dot(tIY))
    g_nit.append(IX.dot(q_nt[:,t].reshape(1,nbi)))

In [17]:
sigma = 1
maxiterIpfp = 1000
maxiter = 500
tolIpfp = 1e-12
tolDescent = 1e-12
t_s = 0.03
iterCount = 0
contIter = True

In [16]:
v_it = np.zeros((nbi, nbt))
beta_k = np.repeat(0, nbk)

thegrad = np.repeat(0, nbk)
pi_nit = []

theval_old = -math.inf

In [18]:
ptm = time.time()
while(contIter):
    
    #print("Iteration", iterCount)
    
    for t in range(nbt):
        
        #print("Year", t)

        D_ij_k = Dnikt[t]

        Phi = D_ij_k.dot(beta_k.reshape(nbk,1)).reshape(nbi,nbi)

        contIpfp = True
        iterIpfp = 0

        v = v_it[:, t].reshape(1,nbi)
        f = f_nit[t]
        g = g_nit[t]

        K = np.exp(Phi/sigma)
        np.fill_diagonal(K,0)

        fK = np.multiply(f,K)
        gK = np.multiply(g,K)

        while(contIpfp):

            iterIpfp = iterIpfp + 1

            u = sigma * np.log(np.sum(np.multiply(gK,np.exp((-IX.dot(v))/sigma)), axis = 1)).flatten()
            vnext = sigma * np.log(np.sum(np.multiply(fK,np.exp((-u.T.dot(tIY))/sigma)), axis = 0))
            error = np.max(np.abs(np.sum(np.multiply(gK,np.exp((-IX.dot(vnext) - u.T.dot(tIY))/sigma)), axis = 1) - 1))

            if (error < tolIpfp or iterIpfp >= maxiterIpfp):
                contIpfp = False
            v = vnext

        v_it[:,t] = np.asarray(v)[0]

        fgK = np.multiply(f,gK)
        pi_nit.append(np.multiply(fgK,np.exp((-IX.dot(v) - u.T.dot(tIY))/sigma)))

        thegrad = thegrad + (pi_nit[t]-pihat_nit[t]).flatten(order = 'F').dot(D_ij_k)

    beta_k = beta_k - t_s * thegrad

    nonzero_pi_nit = np.concatenate(pi_nit).ravel()[np.where(np.concatenate(pi_nit).ravel()>0, True, False)]
    theval = float(np.sum(np.multiply(thegrad,beta_k), axis = 1)) - sigma * float(np.sum(np.multiply(nonzero_pi_nit, np.log(nonzero_pi_nit)),axis=(0,1)))

    iterCount = iterCount + 1

    if (iterCount > maxiter or np.abs(theval - theval_old) < tolDescent):
        contIter = False

    theval_old = theval
    thegrad = np.repeat(0, nbk)
    pi_nit = []
    
diff = time.time() - ptm
print('Time elapsed = ', diff, 's.')

Time elapsed =  18.883745193481445 s.


In [19]:
beta_k = beta_k/sdD_k
print(beta_k)

[[-0.84092368  0.43744866  0.2474767  -0.22249036]]
