In [1]:
import quandl
import numpy as np 
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [2]:
#Choose Stock
df = quandl.get("WIKI/MSFT")
print(df.head())

             Open   High    Low  Close     Volume  Ex-Dividend  Split Ratio  \
Date                                                                          
1997-05-16  22.38  23.75  20.50  20.75  1225000.0          0.0          1.0   
1997-05-19  20.50  21.25  19.50  20.50   508900.0          0.0          1.0   
1997-05-20  20.75  21.00  19.63  19.63   455600.0          0.0          1.0   
1997-05-21  19.25  19.75  16.50  17.13  1571100.0          0.0          1.0   
1997-05-22  17.25  17.38  15.75  16.75   981400.0          0.0          1.0   

            Adj. Open  Adj. High  Adj. Low  Adj. Close  Adj. Volume  
Date                                                                 
1997-05-16   1.865000   1.979167  1.708333    1.729167   14700000.0  
1997-05-19   1.708333   1.770833  1.625000    1.708333    6106800.0  
1997-05-20   1.729167   1.750000  1.635833    1.635833    5467200.0  
1997-05-21   1.604167   1.645833  1.375000    1.427500   18853200.0  
1997-05-22   1.437500   1.

In [16]:
#Choose Paramter
df = df[['Adj. Close']]

In [20]:
#Choose future predictions days
forecast_out = 30
df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)

In [21]:
### Create the independent data set 
X = np.array(df.drop(['Prediction'],1))
X = X[:-forecast_out]
print(X)

[[   1.72916667]
 [   1.70833333]
 [   1.63583333]
 ...
 [1350.47      ]
 [1338.99      ]
 [1386.23      ]]


In [22]:
### Create the dependent data set
y = np.array(df['Prediction'])
y = y[:-forecast_out]

In [23]:
# Split the data
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [8]:
# Create and train the Support Vector Machine (Regressor)
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(x_train, y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [24]:
# Testing Model: Score returns the coefficient of determination R^2 of the prediction. 
svm_confidence = svr_rbf.score(x_test, y_test)
print("svm confidence: ", svm_confidence)

svm confidence:  0.9922896002517492


In [25]:
# Create and train the Linear Regression  Model
lr = LinearRegression()
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [11]:
# Testing Model: Score returns the coefficient of determination R^2 of the prediction. 
# The best possible score is 1.0
lr_confidence = lr.score(x_test, y_test)
print("lr confidence: ", lr_confidence)

lr confidence:  0.9901466788595591


In [12]:
# Set x_forecast equal to the last 30 rows of the original data set from Adj. Close column
x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]
print(x_forecast)

[[1414.51]
 [1451.05]
 [1461.76]
 [1448.69]
 [1468.35]
 [1482.92]
 [1484.76]
 [1500.  ]
 [1521.95]
 [1511.98]
 [1512.45]
 [1493.45]
 [1500.25]
 [1523.61]
 [1537.64]
 [1545.  ]
 [1551.86]
 [1578.89]
 [1598.39]
 [1588.18]
 [1591.  ]
 [1582.32]
 [1571.68]
 [1544.93]
 [1586.51]
 [1581.86]
 [1544.1 ]
 [1495.56]
 [1555.86]
 [1497.05]]


In [14]:
# Print linear regression model predictions for the next 'n' days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)

# Print support vector regressor model predictions for the next 'n' days
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

[1497.65829491 1536.43132232 1547.79583035 1533.92709833 1554.78855752
 1570.24895733 1572.20140316 1588.37274792 1611.66415327 1601.08486802
 1601.58359059 1581.42246523 1588.63802589 1613.42559896 1628.31299837
 1636.12278167 1643.40200904 1672.08386265 1692.77554394 1681.94159184
 1684.93392729 1675.72347633 1664.43324613 1636.04850384 1680.16953503
 1675.23536488 1635.167781   1583.66141126 1647.64645649 1585.24246793]
[1006.51639258 1549.98753713  670.69721871 1050.21651792  670.66404697
  670.66404697  670.66404697  670.66404697  670.66404697  670.66404697
  670.66404697  670.66404697  670.66404697  670.66404697  670.66404697
  670.66404697  670.66404697  670.66404697  670.66404697  670.66404697
  670.66404697  670.66404697  670.66404697  670.66404697  670.66404697
  670.66404697  670.66404697  670.66404697  670.66404697  670.66404697]
