<a href="https://colab.research.google.com/github/YucanLei/github-starter-course/blob/main/a_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import scipy.stats as sts
import time

start = time.time()

# Set model parameters
rho = 0.5
mu = 3.0
sigma = 1.0
z_0 = mu

# Set simulation parameters, draw all idiosyncratic random shocks,
# and create empty containers
S = 1000  # Set the number of lives to simulate
T = 4160  # Set the number of periods for each simulation
np.random.seed(25)
eps_mat = sts.norm.rvs(loc=0, scale=sigma, size=(T, S))
z_mat = np.zeros((T, S))

for s_ind in range(S):
    z_tm1 = z_0
    for t_ind in range(T):
        e_t = eps_mat[t_ind, s_ind]
        z_t = rho * z_tm1 + (1 - rho) * mu + e_t
        z_mat[t_ind, s_ind] = z_t
        z_tm1 = z_t

end = time.time()
print(end - start)

13.078466892242432


In [6]:
from numba import jit

# Set model parameters
rho = 0.5
mu = 3.0
sigma = 1.0
z_0 = mu

# Set simulation parameters, draw all idiosyncratic random shocks,
# and create empty containers
S = 1000  # Set the number of lives to simulate
T = 4160  # Set the number of periods for each simulation
np.random.seed(25)
eps_mat = sts.norm.rvs(loc=0, scale=sigma, size=(T, S))

@jit(nopython = True)
def go_fast(eps_mat, rho, mu, T, S):
  '''
  the function to optimize the performance of the code
  '''
  z_mat = np.zeros((T, S))

  for s_ind in range(S):
      z_tm1 = z_0
      for t_ind in range(T):
          e_t = eps_mat[t_ind, s_ind]
          z_t = rho * z_tm1 + (1 - rho) * mu + e_t
          z_mat[t_ind, s_ind] = z_t
          z_tm1 = z_t

  return z_mat

start = time.time()
go_fast(eps_mat, rho, mu, 4160, 1000)
end = time.time()
print(end - start)

0.19421863555908203


In [15]:
from numba import jit, float64, int32
@jit(float64[:, :](float64[:, :], float64, float64, int32, int32), nopython = True)

def go_fast(eps_mat, rho, mu, T, S):
  '''
  the function to optimize the performance of the code
  '''
  z_mat = np.zeros((T, S))

  for s_ind in range(S):
      z_tm1 = z_0
      for t_ind in range(T):
          e_t = eps_mat[t_ind, s_ind]
          z_t = rho * z_tm1 + (1 - rho) * mu + e_t
          z_mat[t_ind, s_ind] = z_t
          z_tm1 = z_t

  return z_mat

start = time.time()
go_fast(eps_mat, rho, mu, 4160, 1000)
end = time.time()
print(end - start)

t0 = time.time()
z_mat = np.zeros((4160, 1000))
t1 = time.time()
print(t1 - t0)

t0 = time.time()
eps_mat = sts.norm.rvs(loc=0, scale=sigma, size=(T, S))
rho = 0.5
mu = 3.0
T = 4160
S = 1000
t1 = time.time()
print(t1 - t0)



0.06810688972473145
0.00495147705078125
0.14003348350524902


In [16]:
%timeit go_fast(eps_mat, rho, mu, 4160, 1000)

68.4 ms ± 681 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [22]:
from numba import jit, float64, int32
from numba.pycc import CC

cc = CC("test_aot")

@cc.export("go_fast", "f8[:,:](f8[:, :], f8, f8, i4, i4)")

def go_fast(eps_mat, rho, mu, T, S):
  '''
  the function to optimize the performance of the code
  '''
  z_mat = np.zeros((T, S))

  for s_ind in range(S):
      z_tm1 = z_0
      for t_ind in range(T):
          e_t = eps_mat[t_ind, s_ind]
          z_t = rho * z_tm1 + (1 - rho) * mu + e_t
          z_mat[t_ind, s_ind] = z_t
          z_tm1 = z_t

  return z_mat

cc.compile()

# start = time.time()
# go_fast(eps_mat, rho, mu, 4160, 1000)
# end = time.time()
# print(end - start)

# t0 = time.time()
# z_mat = np.zeros((4160, 1000))
# t1 = time.time()
# print(t1 - t0)

# t0 = time.time()
# eps_mat = sts.norm.rvs(loc=0, scale=sigma, size=(T, S))
# rho = 0.5
# mu = 3.0
# T = 4160
# S = 1000
# t1 = time.time()
# print(t1 - t0)




2.6718430519104004
0.005044698715209961
0.1328725814819336


In [23]:
ls

[0m[01;34msample_data[0m/  [01;32mtest_aot.cpython-310-x86_64-linux-gnu.so[0m*


In [26]:
import test_aot

%timeit test_aot.go_fast(eps_mat, rho, mu, 4160, 1000)

92 ms ± 5.74 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [28]:
%timeit test_aot.go_fast(eps_mat, rho, mu, 4160, 1000)

68.8 ms ± 4.08 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
%time go_fast(eps_mat, rho, mu, 4160, 1000)

CPU times: user 105 ms, sys: 1.93 ms, total: 107 ms
Wall time: 109 ms


array([[3.22827309, 4.0268903 , 2.16041515, ..., 3.5980616 , 4.2172622 ,
        4.27401251],
       [2.72415509, 2.76100373, 1.84533647, ..., 4.56294503, 4.15767785,
        2.40206165],
       [0.88323972, 3.60232523, 1.86360091, ..., 3.68876144, 3.25430265,
        3.55861037],
       ...,
       [5.74366912, 2.74903461, 3.7073953 , ..., 1.87117226, 1.64628771,
        2.43676356],
       [5.44468565, 3.16764667, 4.44006409, ..., 2.86687192, 3.29363719,
        2.6143125 ],
       [3.72185889, 4.1198887 , 4.67028049, ..., 2.06330704, 3.21297341,
        2.6348468 ]])