In [1]:
# This program predicts stock prices by using machine learning models

#Install the dependencies
import quandl
import numpy as np 
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [2]:
#Get the stock data
df = quandl.get("FSE/EON_X", authtoken="eSQL3KzSTGXurQhGkt8A")
# Take a look at the data
print(df.head())

             Open   High    Low  Close Change  Traded Volume     Turnover  \
Date                                                                        
2003-01-20  41.55  41.99  41.39  41.39   None      2454991.0  102384282.0   
2003-01-21  41.70  42.02  40.33  40.40   None      3279902.0  135050011.0   
2003-01-22  40.40  40.95  38.32  38.75   None      4568929.0  179763264.0   
2003-01-23  38.89  39.50  38.32  38.70   None      3879946.0  151277585.0   
2003-01-24  38.77  38.92  36.66  36.93   None      5196923.0  196271501.0   

           Last Price of the Day Daily Traded Units Daily Turnover  
Date                                                                
2003-01-20                  None               None           None  
2003-01-21                  None               None           None  
2003-01-22                  None               None           None  
2003-01-23                  None               None           None  
2003-01-24                  None              

In [3]:
df.columns

Index(['Open', 'High', 'Low', 'Close', 'Change', 'Traded Volume', 'Turnover',
       'Last Price of the Day', 'Daily Traded Units', 'Daily Turnover'],
      dtype='object')

In [4]:
df.drop(['Last Price of the Day', 'Daily Traded Units', 'Daily Turnover'], axis=1, inplace=True)

In [5]:
# Get the Adjusted Close Price
df = df['Adj. Close']]
#Take a look at the new data
print(df.head())

KeyError: "None of [Index(['Adj. Close'], dtype='object')] are in the [columns]"

In [17]:
# A variable for predicting 'n' days out into the future
forecast_out = 30 #'n=30' days
#Create another column (the target or dependent variable) shifted 'n' units up
df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)
#print the new data set
print(df.tail())


            Adj. Close  Prediction
Date                              
2018-03-21     1581.86         NaN
2018-03-22     1544.10         NaN
2018-03-23     1495.56         NaN
2018-03-26     1555.86         NaN
2018-03-27     1497.05         NaN


In [18]:
### Create the independent data set (X)  #######
# Convert the dataframe to a numpy array
X = np.array(df.drop(['Prediction'],1))

#Remove the last 'n' rows
X = X[:-forecast_out]
print(X)

[[   1.72916667]
 [   1.70833333]
 [   1.63583333]
 ...
 [1350.47      ]
 [1338.99      ]
 [1386.23      ]]


In [19]:
### Create the dependent data set (y)  #####
# Convert the dataframe to a numpy array (All of the values including the NaN's)
y = np.array(df['Prediction'])
# Get all of the y values except the last 'n' rows
y = y[:-forecast_out]
print(y)

[1.54166667e+00 1.51583333e+00 1.58833333e+00 ... 1.49556000e+03
 1.55586000e+03 1.49705000e+03]


In [0]:
# Split the data into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [21]:
# Create and train the Support Vector Machine (Regressor)
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(x_train, y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [22]:
# Testing Model: Score returns the coefficient of determination R^2 of the prediction. 
# The best possible score is 1.0
svm_confidence = svr_rbf.score(x_test, y_test)
print("svm confidence: ", svm_confidence)

svm confidence:  0.9274190417518909


In [23]:
# Create and train the Linear Regression  Model
lr = LinearRegression()
# Train the model
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [24]:
# Testing Model: Score returns the coefficient of determination R^2 of the prediction. 
# The best possible score is 1.0
lr_confidence = lr.score(x_test, y_test)
print("lr confidence: ", lr_confidence)

lr confidence:  0.9874918531515935


In [25]:
# Set x_forecast equal to the last 30 rows of the original data set from Adj. Close column
x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]
print(x_forecast)

[[1414.51]
 [1451.05]
 [1461.76]
 [1448.69]
 [1468.35]
 [1482.92]
 [1484.76]
 [1500.  ]
 [1521.95]
 [1511.98]
 [1512.45]
 [1493.45]
 [1500.25]
 [1523.61]
 [1537.64]
 [1545.  ]
 [1551.86]
 [1578.89]
 [1598.39]
 [1588.18]
 [1591.  ]
 [1582.32]
 [1571.68]
 [1544.93]
 [1586.51]
 [1581.86]
 [1544.1 ]
 [1495.56]
 [1555.86]
 [1497.05]]


In [26]:
# Print linear regression model predictions for the next 'n' days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)

# Print support vector regressor model predictions for the next 'n' days
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

[1494.09445102 1532.76646354 1544.10136377 1530.26876377 1551.07587287
 1566.4959939  1568.44335304 1584.5725668  1607.80329135 1597.25156817
 1597.74899143 1577.64039159 1584.83715364 1609.56014796 1624.40876142
 1632.19819799 1639.45846087 1668.06559001 1688.70336352 1677.89763698
 1680.88217653 1671.69572145 1660.43490554 1632.12411367 1676.13019689
 1671.20888167 1631.24568536 1579.87350452 1643.69185031 1581.45044209]
[1048.26903008  660.32920232  659.32078508  687.16846237  659.32078508
  659.32078508  659.32078508  659.32078508  659.32078508  659.32078508
  659.32078508  659.32078508  659.32078508  659.32078508  659.32078508
  659.32078508  659.32078508  659.32078508  659.32078508  659.32078508
  659.32078508  659.32078508  659.32078508  659.32078508  659.32078508
  659.32078508  659.32078508  659.32078508  659.32078508  659.32078508]
