# Lecture 6--Simulation

## 1. Programming: Graphviz

## 2. Data Science: ARMA processes

### 2.1 Foundation
#### Definition
#### Simulation
### 2.2 State-space representation
#### Format
#### ARMA to state-space
#### Code
### 2.3 Kalman filter
#### Estimation via CSS
#### Estimation via Maximum likelihood
#### Code
#### Forecasting
### 2.4 Stationarity tests
#### Dickey-Fuller test
#### KPSS test
#### Code

In [3]:
import numpy as np
def kpss(series,diffs=0,trend=False,lagshort=True):
    where = np.where(np.all(np.isfinite(y.values),1))[0]
    start,end = where[0],where[-1]
    y = y.values[start:end+1,:]
    y = np.diff(y,n=diffs)
    n = y.shape[0]
    if lagshort: 
        lags = int(4*(n/100)**0.25)
    else: 
        lags = int(12*(n/100)**0.25)
    y = y.reshape(-1,1)
    x = np.ones((n,1))
    if trend:
        x = np.hstack((x,np.arange(n).reshape(-1,1)))
        table = np.array([0.119,0.146,0.176,0.216])
    else:
        table = np.array([0.347,0.463,0.574,0.739])
    tablep = np.array([0.1,0.05,0.025,0.01])
    res = y-x@np.linalg.solve(x.T@x,x.T@y)
    s = res.cumsum(0)
    eta = s.T@s/n**2
    sig = [2*(1-i/(lags+1))*(res[i:,:].T@res[:res.shape[0]-i,:])[0,0]/res[i:,:].shape[0] for i in range(1,lags+1)]
    sig = np.array(sig).sum()
    sig += (res.T@res)[0,0]/res.shape[0]
    stat = eta/sig
    p = np.interp(stat,table,tablep)
    df = pd.DataFrame(columns=['method','stat','lags','p'])
    df.loc[0] = ['kpss',stat,lags,p]
    return df
def adf(series,diffs=0,lags=None,explosive=False):
    where = np.where(np.all(np.isfinite(y.values),1))[0]
    start,end = where[0],where[-1]
    y = y.values[start:end+1,:]
    y = np.diff(y,n=diffs)
    n = y.shape[0]
    if lags is None: lags=int((n-1)**(1/3))
    y = y.reshape(-1,1)
    dy = np.diff(y)
    ly = y[lags:-1,:]
    n -= lags+1
    ymat = dy[lags:,:]
    xmat = [dy[lags-i:-i,:] for i in range(1,lags+1)]
    xmat = np.hstack((ly,np.ones((n,1)),np.arange(n).reshape(-1,1),*xmat))
    b = np.linalg.solve(xmat.T@xmat,xmat.T@ymat)
    e = ymat-xmat@b
    vb = (e.T@e/n)*np.linalg.inv(xmat.T@xmat)
    stat = b.flatten()/np.sqrt(np.diagonal(vb))
    stat = stat[0]
    method = 'adf'
    table = [[4.38, 4.15, 4.04, 3.99, 3.98, 3.96],\
        [3.95, 3.8 , 3.73, 3.69, 3.68, 3.66],\
        [3.6 , 3.5 , 3.45, 3.43, 3.42, 3.41],\
        [3.24, 3.18, 3.15, 3.13, 3.13, 3.12],\
        [1.14, 1.19, 1.22, 1.23, 1.24, 1.25],\
        [0.8 , 0.87, 0.9 , 0.92, 0.93, 0.94],\
        [0.5 , 0.58, 0.62, 0.64, 0.65, 0.66],\
        [0.15, 0.24, 0.28, 0.31, 0.32, 0.33]]
    tableT = np.array([25, 50, 100, 250, 500, 10000])
    tablep = np.array([0.01,0.025,0.05,0.10,0.90,0.95,0.975,0.88])
    tableilp = []
    ntmp = n
    if n > tableT.max(): 
        ntmp = tableT.max()
    for row in table:
        tableilp += [np.interp(ntmp,tableT,np.array(row))]
    tableilp = -np.array(tableilp)
    p = np.interp(stat,tableilp,tablep)
    df = pd.DataFrame(columns=['method','stat','lags','p'])
    df.loc[0] = ['adf',stat,lags,p]
    return df

## 3. Programming challenges

### 3.1 Spurious regression

### 3.2 OLS estimation of AR(1) process