In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.api import VAR

In [None]:
xyzX = pd.read_csv('xyz-output.csv')

#Remove any columns that aren't used
xyzX = xyzX.loc[:, xyzX.columns != 'date']
xyzX = xyzX.loc[:, xyzX.columns != 'Unnamed: 0']
xyzX = xyzX.loc[:, xyzX.columns != 'daychange']
xyzX = xyzX.loc[:, xyzX.columns != 'anger']
xyzX = xyzX.loc[:, xyzX.columns != 'disgust']
xyzX = xyzX.loc[:, xyzX.columns != 'fear']
xyzX = xyzX.loc[:, xyzX.columns != 'joy']
xyzX = xyzX.loc[:, xyzX.columns != 'sadness']
xyzX = xyzX.loc[:, xyzX.columns != 'surprise']
xyzX = xyzX.loc[:, xyzX.columns != 'connectivity']
xyzX = xyzX.loc[:, xyzX.columns != 'avgdeg']

xyzX = xyzX.head(70)
xyzX

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
sns.set(rc={'figure.figsize':(11,8)})

xyzXs = scaler.fit_transform(xyzX)
xyzXs = pd.DataFrame(xyzXs,columns=['size','edges','diameter','clustering','volume'])
xyzXs[['clustering','volume']].plot()

In [None]:
for metric in xyzXs.columns:
    result = adfuller(xyzXs[[metric]])
    print('A-DF of metric',metric,'has p-value of',result[1])
    print()

In [None]:
#build the VAR model
xyzXm = xyzXs.head(55)

model = VAR(xyzXm)
print(model.select_order(5))

xyz_model = model.fit(1) #Use the recommended lag length for the current stock

xyz_model.summary()

In [None]:
#forecast and plot

sns.set(rc={'figure.figsize':(22,16)})

xyzXs[['volume']].plot()
plt.plot(xyz_model.fittedvalues['volume'],label='predicted_volume')
plt.legend(fontsize='28')
plt.title('#XYZ Volume over Time',fontsize=32)
plt.xlabel('day')
plt.ylabel('Volume (0,1)')

In [None]:
#test for causality
print(xyz_model.test_causality(4,0))
print(xyz_model.test_causality(4,1))
print(xyz_model.test_causality(4,2))
print(xyz_model.test_causality(4,3))
print(xyz_model.test_causality(4,4))

In [None]:
#test for reverse causality
print(xyz_model.test_causality(0,4))
print(xyz_model.test_causality(1,4))
print(xyz_model.test_causality(2,4))
print(xyz_model.test_causality(3,4))
print(xyz_model.test_causality(4,4))

In [None]:
print('XYZ',mean_absolute_error(xyzXs[['volume']][1:55],xyz_model.fittedvalues['volume'])) #start the actual array from the Lth position

In [None]:
forecast_input = xyzXm.values[-1:] # use recommended lag length for the current stock

fc = xyz_model.forecast(y=forecast_input, steps=15)
df_forecast = pd.DataFrame(fc, index=xyzXs.index[55:], columns=xyzXm.columns + '_2d')
df_forecast

In [None]:
print('XYZ',mean_absolute_error(xyzXs[['volume']][55:],df_forecast['volume_2d']))

In [None]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

for i in range(len(xyzXs)):
    print(xyzXs.columns[i],variance_inflation_factor(xyzXs.values,i))