# **Vector AutoRegression (VAR) for FAANG Securities**
---

### Import Libraries 

In [44]:
import os

import pandas as pd
import numpy as np 
import matplotlib.pyplot

### Retrieve Data

In [45]:
ticker = 'FB'
#ticker = 'AAPL'
#ticker = 'AMZN'
#ticker = 'NFLX'
#ticker = 'GOOG'

In [46]:
files = os.listdir('data/stocks')
stocks = {}
for file in files:
    if file.split('.')[1] == 'csv':
        name = file.split('.')[0]
        stocks[name] = pd.read_csv('data/stocks/'+file, index_col='Date')
        stocks[name].index = pd.to_datetime(stocks[name].index)
print('List of stocks: ', end = ' ')
for i in stocks.keys():
    print(i.upper(), end=' ')

List of stocks:  AMZN FB NFLX GOOG AAPL 

In [47]:
stocks[ticker.lower()].head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,Return,Change,Volatility,MA7,MA21,Momentum,RSI,MACD,Signal,Upper_Band,Lower_Band
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2012-05-18,45.0,38.0,42.049999,38.23,573576400,38.23,-0.091,0.0,,,,,,0.0,0.0,,
2012-05-21,36.66,33.0,36.529999,34.029999,168192700,34.029999,-0.068,-4.200001,2.969849,,,,,-0.335043,-0.067009,,
2012-05-22,33.59,30.940001,32.610001,31.0,101786600,31.0,-0.049,-3.029999,3.621059,,,,,-0.835432,-0.220693,,
2012-05-23,32.5,31.360001,31.370001,32.0,73600000,32.0,0.02,1.0,3.151231,,,35.23,,-1.138182,-0.404191,,
2012-05-24,33.209999,31.77,32.950001,33.029999,50237200,33.029999,0.002,1.029999,2.713883,,,31.029999,,-1.280243,-0.579401,,


---
## Find Optimal Lag Structure 
---
- Akaike (AIC): `-2log(L(K)) + (2K/(N-p))`
- Schwarz (SIC): `-2log(L(K)) + ((2Klog(N-p))/(N-p))`
- Hannan-Quinn (HQIC): `-2log(L(K)) + ((2Klog(log(N-p)))/(N-p))`
> - `L(K)` = likelihood of *K*-variate VAR model 
> - `N` = sample size
> - `p` = estimated parameters 
- in python library *statsmodel*

In [67]:
optimal_lag = 100

var_df = stocks[ticker.lower()][['Close','Volume']]
print(var_df.head())

features = var_df.columns
print(features)

                Close     Volume
Date                            
2012-05-18  38.230000  573576400
2012-05-21  34.029999  168192700
2012-05-22  31.000000  101786600
2012-05-23  32.000000   73600000
2012-05-24  33.029999   50237200
Index(['Close', 'Volume'], dtype='object')


In [73]:
for i in range(1, optimal_lag + 1):
    for j in features:
        var_df[f"{j}_Lag_{i}"] = var_df[j].shift(i)

#print(var_df)
var_df = var_df.dropna()
#print(var_df)

### Transform Data Into Matrices

In [82]:
y_price = var_df['Close']
y_volume = var_df['Volume']
var_mx = var_df.drop(['Close','Volume'], axis=1)

# Intercept Column with Value 1 Throughout 
var_mx.insert(0, 'Intercept', 1)

# Transform into Matrix 
X = var_mx.to_numpy()
y_price = y_price.to_numpy()
y_volume = y_volume.to_numpy()

#print('Price: ', y_price)
#print('Demand: ', y_volume)
#print('Matrix: ', X)


In [83]:
def NormEquations(X,y):

    XtX = np.matmul(X.T, X)
    XtY = np.matmul(X.T, y)
    XtX_Inv = np.linalg.inv(XtX)

    b = np.matmul(XtX_Inv, XtY)

    return b

In [84]:
b_price = NormEquations(X, y_price)
b_demand = NormEquations(X, y_volume)

#print('Price: ', b_price)
#print('Demand: ', b_demand)

In [85]:
price_estimates = []
demand_estimates = []

for i in var_mx.index: 
    entry = var_mx.loc[i].to_numpy()
    price_hat = np.dot(b_price, entry)
    price_estimates.append(price_hat)

    demand_hat = np.dot(b_demand, entry)
    demand_estimates.append(demand_hat)

#print('Price Estimates: ', price_estimates)
#print('Demand Estimates: ', demand_estimates)
#print(var_mx)

In [95]:
N = len(y_price)
p = len(var_mx.columns)



def SumofSquares(y, X, b):
    SStot = np.matmul(y.T, y)
    SSreg = np.matmul(np.matmul(y.T, X), b)
    SSres = np.matmul((y - np.matmul(X, b)).T, (y - np.matmul(X,b)))
    return SStot, SSreg, SSres

def RSquared(SSres, SStot, y, n): 
    R = 1- SSres / (SStot - sum(y) ** 2 / n)
    return R

def MSE(SSres, n): 
    M = SSres/n
    return M

def Fstat(SSreg, SSres, n, p): 
    F = (SSreg/p) / (SSres / (n-p))
    return F

def Diagnostics(y, X, b, n, p): 
    SStot, SSreg, SSres = SumofSquares(y, X, b)
    R = RSquared(SSres, SStot, y,n)
    #print("R-Squared: {}".format(round(R,2)))
    M = MSE(SSres, n)
    #print('MSE: {}'.format(round(M,2)))
    F = Fstat(SSreg, SSres, n,p)
    #print('F-Statistic: {}'.format(round(F,2)))

    return print(' R-Squared : {} \n MSE: {} \n F-Statistic: {}'.format(round(R,2),round(M,2),round(F,2)))

In [104]:
print("Price Data:")
price_diag = Diagnostics(y_price, X, b_price, N, p)
print("Demand Data:")
demand_diag = Diagnostics(y_volume, X, b_demand, N, p)


Price Data:
 R-Squared : 1.0 
 MSE: 14.58 
 F-Statistic: 23350.03
Demand Data:
 R-Squared : 0.84 
 MSE: 245109450748366.2 
 F-Statistic: 57.17


In [105]:
ahead = 100
exPost_Price = []
exPost_Demand = []

data = var_mx.iloc[-1].to_list()

for i in range(ahead): 
    predictPrice = np.dot(data,b_price)
    predictDemand = np.dot(data,b_demand)

    data = data[:-2]
    data.insert(1, predictDemand)
    data.insert(1,predictPrice)

    exPost_Demand.append(predictDemand)
    exPost_Price.append(predictPrice)