In [1]:
import pandas as pd
import numpy as np

In [2]:
import matplotlib.pyplot as plt
import sklearn
import quandl

In [3]:
from sklearn.linear_model import LinearRegression 
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

Load data using the **Quandl API**

In [4]:
df = quandl.get('WIKI/FB')
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2012-05-18,42.05,45.0,38.0,38.2318,573576400.0,0.0,1.0,42.05,45.0,38.0,38.2318,573576400.0
2012-05-21,36.53,36.66,33.0,34.03,168192700.0,0.0,1.0,36.53,36.66,33.0,34.03,168192700.0
2012-05-22,32.61,33.59,30.94,31.0,101786600.0,0.0,1.0,32.61,33.59,30.94,31.0,101786600.0
2012-05-23,31.37,32.5,31.36,32.0,73600000.0,0.0,1.0,31.37,32.5,31.36,32.0,73600000.0
2012-05-24,32.95,33.21,31.77,33.03,50237200.0,0.0,1.0,32.95,33.21,31.77,33.03,50237200.0


In [5]:
df = df[['Adj. Close']] # independent variable
df.head(), df.shape

(            Adj. Close
 Date                  
 2012-05-18     38.2318
 2012-05-21     34.0300
 2012-05-22     31.0000
 2012-05-23     32.0000
 2012-05-24     33.0300,
 (1472, 1))

In [6]:
# variable for predicting 'n' days out into the future
forecast_out = 30
# We need a target variable
# we use the 'Shift Method' ---> shift the data up 
# to do that we create another column --->'Target/Dependent Variable' Shifted 'n' units up
df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)
df.head(), df.shape, df.tail()

(            Adj. Close  Prediction
 Date                              
 2012-05-18     38.2318      30.771
 2012-05-21     34.0300      31.200
 2012-05-22     31.0000      31.470
 2012-05-23     32.0000      31.730
 2012-05-24     33.0300      32.170,
 (1472, 2),
             Adj. Close  Prediction
 Date                              
 2018-03-21      169.39         NaN
 2018-03-22      164.89         NaN
 2018-03-23      159.39         NaN
 2018-03-26      160.06         NaN
 2018-03-27      152.19         NaN)

In [7]:
#### Create the independent data_set (X) ####
## Convert dataframe to a numpy array ###
X = np.array(df.drop(['Prediction'], 1))
### Remove the last 'n' rows ## 
X = X[:-forecast_out]
X

array([[ 38.2318],
       [ 34.03  ],
       [ 31.    ],
       ...,
       [171.5499],
       [175.98  ],
       [176.41  ]])

In [8]:
#### Create the dependent data_set (y) ####
### Convert dataframe to a numpy array (all the values including the NaNs) ###
y = np.array(df['Prediction'])
### Get all values except the last 'n' rows 
y = y[:-forecast_out]
y

array([ 30.771,  31.2  ,  31.47 , ..., 159.39 , 160.06 , 152.19 ])

In [9]:
### Split the data : 80% training ---- 20% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [10]:
#### Create and Train the SVM Regressor ### 
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(X_train, y_train)

SVR(C=1000.0, gamma=0.1)

In [11]:
### Testing Model : Return the coefficient of determination R^2 of the prediction (The Score) ###
svm_confidence = svr_rbf.score(X_test, y_test)
print('SVM Confidence : ' ,svm_confidence)

SVM Confidence :  0.9814153931290955


In [12]:
### Create and Train the Linear Regression Model ###
lr = LinearRegression()
# Train the model
lr.fit(X_train, y_train)

LinearRegression()

In [13]:
### Testing Model : Return the coefficient of determination R^2 of the prediction (The Score) ###
lr_confidence = lr.score(X_test, y_test)
print('LinearRegression Confidence : ' ,lr_confidence)

LinearRegression Confidence :  0.9819369057039685


In [14]:
### Set x_forecast = last 30 rows of the original data_set from: 'Adj. Close' ###
x_forecast = np.array(df.drop(['Prediction'], 1))[-forecast_out:]
x_forecast

array([[173.15],
       [179.52],
       [179.96],
       [177.36],
       [176.01],
       [177.91],
       [178.99],
       [183.29],
       [184.93],
       [181.46],
       [178.32],
       [175.94],
       [176.62],
       [180.4 ],
       [179.78],
       [183.71],
       [182.34],
       [185.23],
       [184.76],
       [181.88],
       [184.19],
       [183.86],
       [185.09],
       [172.56],
       [168.15],
       [169.39],
       [164.89],
       [159.39],
       [160.06],
       [152.19]])

In [15]:
### Print Linear Regression Model Prediction for the next 'n' days ###
lr_prediction = lr.predict(x_forecast)
lr_prediction

array([176.4850928 , 182.88769429, 183.3299462 , 180.71663947,
       179.35973021, 181.26945436, 182.35498177, 186.67698906,
       188.32538253, 184.83762316, 181.68155273, 179.28937195,
       179.97285217, 183.77219811, 183.14902497, 187.0991386 ,
       185.72212698, 188.62691792, 188.15451248, 185.25977271,
       187.58159523, 187.2499063 , 188.48620141, 175.8920732 ,
       171.45950293, 172.70584922, 168.18281834, 162.65466948,
       163.32809853, 155.41782007])

In [16]:
### Print SVM Model Prediction for the next 'n' days ###
svm_prediction = svr_rbf.predict(x_forecast)
svm_prediction

array([172.322646  , 181.31990686, 181.15232835, 175.24564161,
       175.03335007, 177.19953634, 180.82747079, 179.37058796,
       178.7684266 , 180.53095589, 178.84989246, 175.15029044,
       174.32365655, 180.79543301, 181.26319478, 178.71141563,
       180.5319197 , 179.35537863, 178.52324946, 180.61409589,
       178.27388676, 178.52381786, 179.05997381, 173.36611693,
       171.71005875, 171.91986117, 172.81097881, 167.71470478,
       166.30438692, 160.49423834])