In [1]:
import quandl, warnings
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

In [2]:
## Disable Python Warnings
warnings.filterwarnings("ignore")

## This Program Predicts Stock Prices by using Machine Learning Models.
# Get The Stock Data using Quandl.

df = quandl.get('WIKI/AAPL')
df.tail(5)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-03-21,175.04,175.09,171.26,171.27,35247358.0,0.0,1.0,175.04,175.09,171.26,171.27,35247358.0
2018-03-22,170.0,172.68,168.6,168.845,41051076.0,0.0,1.0,170.0,172.68,168.6,168.845,41051076.0
2018-03-23,168.39,169.92,164.94,164.94,40248954.0,0.0,1.0,168.39,169.92,164.94,164.94,40248954.0
2018-03-26,168.07,173.1,166.44,172.77,36272617.0,0.0,1.0,168.07,173.1,166.44,172.77,36272617.0
2018-03-27,173.68,175.15,166.92,168.34,38962839.0,0.0,1.0,173.68,175.15,166.92,168.34,38962839.0


In [3]:
df.columns

Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Ex-Dividend', 'Split Ratio',
       'Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume'],
      dtype='object')

In [4]:
# Get the Adjusted Close Price | Independent Variable
df = df[['Adj. Close']]
df.tail(5)

Unnamed: 0_level_0,Adj. Close
Date,Unnamed: 1_level_1
2018-03-21,171.27
2018-03-22,168.845
2018-03-23,164.94
2018-03-26,172.77
2018-03-27,168.34


In [5]:
# Predicting N Days out into the Future
forecast_out = 30
# Create New Column | Target or Dependent Variable
df['Prediction'] =  df[['Adj. Close']].shift(-forecast_out)
df.tail()

Unnamed: 0_level_0,Adj. Close,Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-03-21,171.27,
2018-03-22,168.845,
2018-03-23,164.94,
2018-03-26,172.77,
2018-03-27,168.34,


In [6]:
## Create Independent DataSet X
# Convert DataFrame to Numpy Array.
X = np.array(df.drop(['Prediction'], axis = 1))
# Remove the Last N rows
X = X[ : -forecast_out]
print(X)

[[  0.42270592]
 [  0.40065169]
 [  0.37124607]
 ...
 [155.32      ]
 [155.97      ]
 [162.71      ]]


In [7]:
## Create Dependent DataSet Y
# Convert DataFrame to Numpy Array (All Values including NaN).
Y = np.array(df['Prediction'])
# Get All Y Values Except Last N rows
Y = Y[ : -forecast_out]
print(Y)
## X is Nested List || Y is List

[  0.47049006   0.45578725   0.43917307 ... 164.94       172.77
 168.34      ]


In [8]:
## Split DataSet into 80 % Training || 20 % Testing
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size =  0.2)

In [9]:
## Create and Train the Support Vector Machine (Regressor) Model
# rbf is radio basis kernel
svr_rbf = SVR(kernel = 'rbf', C = 1e3, gamma = 0.1)
svr_rbf.fit(x_train, y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [10]:
## Testing the Support Vector Machine (Regressor) Model
# Score Return Coefficient of Determination R^2 of Prediction
# The Best Possible Score is 1.0
svm_confidence = svr_rbf.score(x_test, y_test)
print(f"SVM Confidence: {round(svm_confidence * 100, 2)} %")

SVM Confidence: 99.28 %


In [11]:
## Create and Train the Linear Regresion Model
lr = LinearRegression()
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [12]:
## Testing the Linear Regresion Model
lr_confidence = lr.score(x_test, y_test)
print(f"Linear Regression Confidence: {round(lr_confidence * 100, 2)} %")

Linear Regression Confidence: 99.3 %


In [13]:
## Set x_forecast = Last 30 Rows of Original DataSet from Adj. Close Column.
x_forecast = np.array(df.drop(['Prediction'], axis = 1))[-forecast_out : ]
x_forecast

array([[164.34 ],
       [167.37 ],
       [172.99 ],
       [172.43 ],
       [171.85 ],
       [171.07 ],
       [172.6  ],
       [175.555],
       [178.97 ],
       [178.39 ],
       [178.12 ],
       [175.   ],
       [176.21 ],
       [176.82 ],
       [176.67 ],
       [175.03 ],
       [176.94 ],
       [179.98 ],
       [181.72 ],
       [179.97 ],
       [178.44 ],
       [178.65 ],
       [178.02 ],
       [175.3  ],
       [175.24 ],
       [171.27 ],
       [168.845],
       [164.94 ],
       [172.77 ],
       [168.34 ]])

In [14]:
## Print Linear Regression Model Prediction for Next N Days.
lr_prediction = lr.predict(x_forecast)
lr_prediction

array([167.85846159, 170.95083749, 176.68653141, 176.11500319,
       175.52306325, 174.72700609, 176.28850283, 179.30433478,
       182.78963633, 182.19769639, 181.92213814, 178.73790949,
       179.97281868, 180.5953762 , 180.44228829, 178.76852707,
       180.71784653, 183.8204283 , 185.59624813, 183.81022244,
       182.2487257 , 182.46304878, 181.82007953, 179.04408532,
       178.98285015, 174.93112331, 172.456202  , 168.47081325,
       176.46200247, 171.94080601])

In [15]:
## Print SVM Model Prediction for Next N Days.
svm_prediction = svr_rbf.predict(x_forecast)
svm_prediction

array([158.3087851 , 177.7029507 , 172.31550622, 171.98630399,
       171.48207897, 171.18624123, 172.11150213, 174.36740008,
       176.58295148, 176.69977675, 176.58646242, 173.44512276,
       175.38444508, 175.96359489, 175.85617532, 173.49043457,
       176.03650022, 172.66570421, 146.540151  , 172.74155067,
       176.71470795, 176.73933611, 176.53766428, 173.9281223 ,
       173.82711491, 171.16738844, 177.78693173, 156.65867999,
       172.21537494, 179.37869958])

In [16]:
df.tail(10)

Unnamed: 0_level_0,Adj. Close,Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-03-14,178.44,
2018-03-15,178.65,
2018-03-16,178.02,
2018-03-19,175.3,
2018-03-20,175.24,
2018-03-21,171.27,
2018-03-22,168.845,
2018-03-23,164.94,
2018-03-26,172.77,
2018-03-27,168.34,
