## Review of AR(3)


In [3]:
import numpy as np
import statsmodels.api as sm

def linear_regression(design_matrix, target_vector):
    return np.linalg.inv(design_matrix.transpose() @ design_matrix) @ design_matrix.transpose() @ target_vector

In [6]:
test_values = np.array([0,1,2,3,4,5,6])

In [4]:
def train_ar(values, order):
    # almost all of the observed values excluding the values that come before the order
    # so for ar(3) model start at 3 because skipping 0,1,2 as need these to predict 3
    target_vector = np.array(values[order:])
    
    # rows length = target vector = length of values - order
    lagged_values = []
    for i in range(len(values) - order):
        # length of order
        lagged_values.append(values[i:i+order])
    # rows = length same as y
    # columns = order
    design_matrix = np.array(lagged_values)
    print(design_matrix)

    return linear_regression(design_matrix, target_vector)

In [12]:
coeffs_test = (train_ar(test_values, 2))
print(coeffs_test)

[[0 1]
 [1 2]
 [2 3]
 [3 4]
 [4 5]]
[-1.  2.]


Now predict by taking the coefficients and multiplying by lag values and add them up to get new value
That's how AR(p) works
Dot product does product in place then adds them up

In [15]:
import math
def predict(coeffs, data):
    # create  empty set of values = order
    # because ar(2) model will have 2 nan numbers to start as can't predict those
    predicted_values = [math.nan] * len(coeffs)

    for i in range(len(coeffs), len(data)):
        predicted_values.append(np.dot(coeffs, data[i-len(coeffs):i]))
    # convert to numpy array which rounds them
    return np.array(predicted_values)

pred_test = predict(coeffs_test, [0,1,2,3,4,5,6,7,8,9,10,11])
print(pred_test)

[nan, nan, 2.0000000000000027, 2.999999999999991, 3.9999999999999796, 4.999999999999968, 5.999999999999957, 6.999999999999945, 7.9999999999999325, 8.999999999999922, 9.999999999999911, 10.9999999999999]
[nan nan  2.  3.  4.  5.  6.  7.  8.  9. 10. 11.]
