# **Vector AutoRegression (VAR) for FAANG Securities**
---

### Import Libraries 

In [51]:
import os

import pandas as pd
import numpy as np 
import matplotlib.pyplot

### Retrieve Data

In [52]:
ticker = 'FB'
#ticker = 'AAPL'
#ticker = 'AMZN'
#ticker = 'NFLX'
#ticker = 'GOOG'

In [53]:
files = os.listdir('data/stocks')
stocks = {}
for file in files:
    if file.split('.')[1] == 'csv':
        name = file.split('.')[0]
        stocks[name] = pd.read_csv('data/stocks/'+file, index_col='Date')
        stocks[name].index = pd.to_datetime(stocks[name].index)
print('List of stocks: ', end = ' ')
for i in stocks.keys():
    print(i.upper(), end=' ')

List of stocks:  AMZN FB NFLX GOOG AAPL 

In [54]:
stocks[ticker.lower()].head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,Return,Change,Volatility,MA7,MA21,Momentum,RSI,MACD,Signal,Upper_Band,Lower_Band
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2012-05-18,45.0,38.0,42.049999,38.23,573576400,38.23,-0.091,0.0,,,,,,0.0,0.0,,
2012-05-21,36.66,33.0,36.529999,34.029999,168192700,34.029999,-0.068,-4.200001,2.969849,,,,,-0.335043,-0.067009,,
2012-05-22,33.59,30.940001,32.610001,31.0,101786600,31.0,-0.049,-3.029999,3.621059,,,,,-0.835432,-0.220693,,
2012-05-23,32.5,31.360001,31.370001,32.0,73600000,32.0,0.02,1.0,3.151231,,,35.23,,-1.138182,-0.404191,,
2012-05-24,33.209999,31.77,32.950001,33.029999,50237200,33.029999,0.002,1.029999,2.713883,,,31.029999,,-1.280243,-0.579401,,


In [55]:
var_df = stocks[ticker.lower()][['Close','Volume']]
var_df.head()

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-05-18,38.23,573576400
2012-05-21,34.029999,168192700
2012-05-22,31.0,101786600
2012-05-23,32.0,73600000
2012-05-24,33.029999,50237200


---
## Find Optimal Lag Structure 
---
- Akaike (AIC): `-2log(L(K)) + (2K/(N-p))`
- Schwarz (SIC): `-2log(L(K)) + ((2Klog(N-p))/(N-p))`
- Hannan-Quinn (HQIC): `-2log(L(K)) + ((2Klog(log(N-p)))/(N-p))`
> - `L(K)` = likelihood of *K*-variate VAR model 
> - `N` = sample size
> - `p` = estimated parameters 
- in python library *statsmodel*

In [56]:
optimal_lag = 100
features = var_df.columns

print(features)


Index(['Close', 'Volume'], dtype='object')


In [57]:
for i in range(1, optimal_lag + 1): 
    for j in features: 
        var_df[f"{j}_Lag{i}"] = var_df[j].shift(i)

#print(var_df)


var_df.dropna()
#print(var_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  var_df[f"{j}_Lag{i}"] = var_df[j].shift(i)


Unnamed: 0_level_0,Close,Volume,Close_Lag1,Volume_Lag1,Close_Lag2,Volume_Lag2,Close_Lag3,Volume_Lag3,Close_Lag4,Volume_Lag4,...,Close_Lag96,Volume_Lag96,Close_Lag97,Volume_Lag97,Close_Lag98,Volume_Lag98,Close_Lag99,Volume_Lag99,Close_Lag100,Volume_Lag100
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-10-10,19.639999,39321800,20.230000,27161800.0,20.400000,32236700.0,20.910000,40529300.0,21.950001,46892100.0,...,33.029999,50237200.0,32.000000,73600000.0,31.000000,101786600.0,34.029999,168192700.0,38.230000,573576400.0
2012-10-11,19.750000,21817300,19.639999,39321800.0,20.230000,27161800.0,20.400000,32236700.0,20.910000,40529300.0,...,31.910000,37149800.0,33.029999,50237200.0,32.000000,73600000.0,31.000000,101786600.0,34.029999,168192700.0
2012-10-12,19.520000,18809400,19.750000,21817300.0,19.639999,39321800.0,20.230000,27161800.0,20.400000,32236700.0,...,28.840000,78063400.0,31.910000,37149800.0,33.029999,50237200.0,32.000000,73600000.0,31.000000,101786600.0
2012-10-15,19.520000,20189700,19.520000,18809400.0,19.750000,21817300.0,19.639999,39321800.0,20.230000,27161800.0,...,28.190001,57267900.0,28.840000,78063400.0,31.910000,37149800.0,33.029999,50237200.0,32.000000,73600000.0
2012-10-16,19.480000,21834700,19.520000,20189700.0,19.520000,18809400.0,19.750000,21817300.0,19.639999,39321800.0,...,29.600000,111639200.0,28.190001,57267900.0,28.840000,78063400.0,31.910000,37149800.0,33.029999,50237200.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-01,224.850006,19533300,222.360001,24192300.0,227.850006,25588000.0,229.860001,31417900.0,223.589996,26224100.0,...,340.890015,25414300.0,327.739990,12376600.0,327.640015,21872600.0,335.369995,17556700.0,338.619995,18342500.0
2022-04-04,233.889999,28054800,224.850006,19533300.0,222.360001,24192300.0,227.850006,25588000.0,229.860001,31417900.0,...,347.559998,25076600.0,340.890015,25414300.0,327.739990,12376600.0,327.640015,21872600.0,335.369995,17556700.0
2022-04-05,231.839996,29727200,233.889999,28054800.0,224.850006,19533300.0,222.360001,24192300.0,227.850006,25588000.0,...,342.959991,18181100.0,347.559998,25076600.0,340.890015,25414300.0,327.739990,12376600.0,327.640015,21872600.0
2022-04-06,223.300003,28995100,231.839996,29727200.0,233.889999,28054800.0,224.850006,19533300.0,222.360001,24192300.0,...,340.769989,13602800.0,342.959991,18181100.0,347.559998,25076600.0,340.890015,25414300.0,327.739990,12376600.0


In [58]:
y_price = var_df['Close']
x_volume = var_df['Volume']
var_df = var_df.drop(['Close','Volume'], axis=1)

# Intercept Column with Value 1 Throughout 
var_df.insert(0, 'Intercept', 1)

# Transform into Matrix 
X = var_df.to_numpy()
y_price = y_price.to_numpy()
x_volume = x_volume.to_numpy()
