In [1]:
# Stock Predictor via ML Models
import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [6]:
df = quandl.get("WIKI/FB")
#Look at Data
#print(df.tail())

In [11]:
df = df[['Adj. Close']]
print(df.head())

            Adj. Close
Date                  
2012-05-18     38.2318
2012-05-21     34.0300
2012-05-22     31.0000
2012-05-23     32.0000
2012-05-24     33.0300


In [16]:
#Prediction n days into the future
forecast_out = 30
#Create column (dep. variable) shifted n units up
df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)
print(df.head())

            Adj. Close  Prediction
Date                              
2012-05-18     38.2318      30.771
2012-05-21     34.0300      31.200
2012-05-22     31.0000      31.470
2012-05-23     32.0000      31.730
2012-05-24     33.0300      32.170


In [20]:
### Independent set for x
# df to np arr
x = np.array(df.drop(['Prediction'], 1))
# Remove last n rows
x = x[:-forecast_out]


In [21]:
### Dependent set for y
# df to np arr (everything including NaN's)
y = np.array(df['Prediction'])
# Get all y except last n rows
y = y[:-forecast_out]

In [22]:
# 80 20 Test Train Split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

In [23]:
# Create + Train Support Vector Machine (Regressor)
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(x_train, y_train)


SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [26]:
# Test Model: Score returns coef of Determination r^2 of the prediction
# best score is 1.0
svm_confidence = svr_rbf.score(x_test, y_test)
print("svm, confidence: ", svm_confidence)

svm, confidence:  0.9754300884535297


In [28]:
# Create and Train Linear Regression Model
lr = LinearRegression()
# Train the Model
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [29]:
# Test Model: Score returns coef of Determination r^2 of the prediction
# best score is 1.0
lr_confidence = lr.score(x_test, y_test)
print("lr, confidence: ", lr_confidence)

lr, confidence:  0.9819267604958196


In [30]:
# Set x_forecast is last 30 rows of the data set from adj.close column
x_forecast = np.array(df.drop(['Prediction'], 1))[-forecast_out:]
print(x_forecast)

[[173.15]
 [179.52]
 [179.96]
 [177.36]
 [176.01]
 [177.91]
 [178.99]
 [183.29]
 [184.93]
 [181.46]
 [178.32]
 [175.94]
 [176.62]
 [180.4 ]
 [179.78]
 [183.71]
 [182.34]
 [185.23]
 [184.76]
 [181.88]
 [184.19]
 [183.86]
 [185.09]
 [172.56]
 [168.15]
 [169.39]
 [164.89]
 [159.39]
 [160.06]
 [152.19]]


In [32]:
# Print linear regression model predictions for next 'n' days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)
# Print support vector regressor model predictions for next 'n' days
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

[176.99343094 183.41671059 183.86039082 181.23864402 179.87735241
 181.7932443  182.88227759 187.21824344 188.87196065 185.37293704
 182.20667361 179.80676692 180.49245455 184.30407104 183.67888527
 187.64175639 186.2602975  189.1744699  188.70053875 185.79644999
 188.12577118 187.79301101 189.03329892 176.39849609 171.95161018
 173.20198173 168.66434304 163.11834021 163.79394419 155.85811831]
[174.86446558 181.28935865 180.91808258 174.84733859 173.28752458
 177.24647037 181.16403269 185.06439323 176.0295288  184.69178277
 179.17004788 173.30150789 173.31884034 180.89967137 181.07091989
 181.78812228 188.20201246 176.54229599 176.09107304 186.84033161
 178.2509256  180.59246691 176.21102891 176.7122552  171.33277383
 172.26899165 171.56177019 167.55444215 166.30145436 161.31246311]
