In [1]:
import numpy as np

In [2]:
# Reading NASDAQ data for the year 2000
indices2000 = []
with open('nasdaq00.txt', 'r') as f:
    lines = f.readlines()
    indices2000 = [float(index.replace('\n', '')) for index in lines]

In the code, following equations are used to compute the linear coefficients that maximize ${\cal L}$<br><br>
$${\cal L} = \sum_t \log P(x_t|x_{t-1},x_{t-2},x_{t-3})$$
$${\cal L} = \sum_t \log \left[\frac{1}{\sqrt{2\pi}}\exp\left[-\frac{1}{2}\biggl(x_t - a_1 x_{t-1} - a_2 x_{t-2} - a_3 x_{t-3}\biggr)^2\right]\right]$$
$${\cal L} = \sum_t \left[\log \frac{1}{\sqrt{2\pi}} + \log \left[\exp\left[-\frac{1}{2}\biggl(x_t - a_1 x_{t-1} - a_2 x_{t-2} - a_3 x_{t-3}\biggr)^2\right]\right]\right]$$
$${\cal L} = \sum_t \log \frac{1}{\sqrt{2\pi}} + \sum_t \log \left[\exp\left[-\frac{1}{2}\biggl(x_t - a_1 x_{t-1} - a_2 x_{t-2} - a_3 x_{t-3}\biggr)^2\right]\right]$$
$${\cal L} = \sum_t \log \frac{1}{\sqrt{2\pi}} - \sum_t \frac{1}{2}\biggl(x_t - a_1 x_{t-1} - a_2 x_{t-2} - a_3 x_{t-3}\biggr)^2$$
$$To\ maximize\ {\cal L},\ \frac{\partial {\cal L}}{\partial a_i} = 0\ for\ i \in \{1, 2, 3\}$$
$$\frac{\partial {\cal L}}{\partial a_i} = \sum_t x_{t-i} \biggl(x_t - a_1 x_{t-1} - a_2 x_{t-2} - a_3 x_{t-3}\biggr) = 0$$
$$\sum_t \biggl(x_{t-i} x_t - a_1 x_{t-i} x_{t-1} - a_2 x_{t-i} x_{t-2} - a_3 x_{t-i} x_{t-3}\biggr) = 0$$
$$\sum_t x_{t-i} x_t - a_1 \sum_t x_{t-i} x_{t-1} - a_2 \sum_t x_{t-i} x_{t-2} - a_3 \sum_t x_{t-i} x_{t-3} = 0$$
$$a_1 \sum_t x_{t-i} x_{t-1} + a_2 \sum_t x_{t-i} x_{t-2} + a_3 \sum_t x_{t-i} x_{t-3} = \sum_t x_{t-i} x_t$$
<br>This can be written as a matrix equation like the one shown below :-<br><br>
$$X  A = Y,\ where$$
$$X = 
\begin{pmatrix}
\sum_t x_{t-1}^2 & \sum_t x_{t-1} x_{t-2} & \sum_t x_{t-1} x_{t-3}\\
\sum_t x_{t-2} x_{t-1} & \sum_t x_{t-2}^2 & \sum_t x_{t-2} x_{t-3}\\
\sum_t X_{t-3} x_{t-1} & \sum_t x_{t-3} x_{t-2} & \sum_t x_{t-3}^2\\
\end{pmatrix}
$$
$$A = 
\begin{pmatrix}
    a_1\\
    a_2\\
    a_3
\end{pmatrix}$$
$$Y = 
\begin{pmatrix}
    \sum_t x_t x_{t-1}\\
    \sum_t x_t x_{t-2}\\
    \sum_t x_t x_{t-3}
\end{pmatrix}$$

In [3]:
# Part A
# Solving the matrix equation explained before
Xt = indices2000[3:249]
XtMinus1 = indices2000[2:248]
XtMinus2 = indices2000[1:247]
XtMinus3 = indices2000[0:246]

X00 = sum(np.array(XtMinus1)**2)
X01 = sum(np.multiply(XtMinus1, XtMinus2))
X02 = sum(np.multiply(XtMinus1, XtMinus3))

X10 = sum(np.multiply(XtMinus2, XtMinus1))
X11 = sum(np.multiply(XtMinus2, XtMinus2))
X12 = sum(np.multiply(XtMinus2, XtMinus3))

X20 = sum(np.multiply(XtMinus3, XtMinus1))
X21 = sum(np.multiply(XtMinus3, XtMinus2))
X22 = sum(np.multiply(XtMinus3, XtMinus3))

Y0 = sum(np.multiply(Xt, XtMinus1))
Y1 = sum(np.multiply(Xt, XtMinus2))
Y2 = sum(np.multiply(Xt, XtMinus3))

X = np.array([[X00, X01, X02], [X10, X11, X12], [X20, X21, X22]])
Y = np.array([[Y0, Y1, Y2]])
A = np.dot(Y, np.linalg.inv(X))

print(A)

[[0.95067337 0.01560133 0.03189569]]


In [4]:
# Reading NASDAQ data for the year 2001
indices2001 = []
with open('nasdaq01.txt', 'r') as f:
    lines = f.readlines()
    indices2001 = [float(index.replace('\n', '')) for index in lines]

In [5]:
# Part B
# RMSE for year 2000
Xt = indices2000[3:249]
XtMinus1 = indices2000[2:248]
XtMinus2 = indices2000[1:247]
XtMinus3 = indices2000[0:246]
X = np.array([XtMinus1, XtMinus2, XtMinus3])
Y = np.array([Xt])
rmse2000 = np.sqrt(np.sum((np.dot(A,X) - Y)**2)/len(Xt))

# RMSE for year 2001
Xt = indices2001[3:248]
XtMinus1 = indices2001[2:247]
XtMinus2 = indices2001[1:246]
XtMinus3 = indices2001[0:245]
X = np.array([XtMinus1, XtMinus2, XtMinus3])
Y = np.array([Xt])
rmse2001 = np.sqrt(np.sum((np.dot(A,X) - Y)**2)/len(Xt))

print("RMSE on data from 2000 ", rmse2000)
print("RMSE on data from 2001 ", rmse2001)

RMSE on data from 2000  117.90844361778285
RMSE on data from 2001  54.636049675199
