In [1]:
import pandas, numpy
from datetime import datetime, timedelta
import pandas_datareader.data as web
from sklearn import linear_model, svm
from sklearn.model_selection import train_test_split

In [2]:
### Predicts Stock Prices using Machine Learning Models.

start_date = datetime.now() - timedelta(days = 365)

df = web.DataReader('AAPL', 'yahoo', start_date, datetime.now())
df.tail(10)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-02-11,323.899994,318.709991,323.600006,319.609985,23580800.0,319.609985
2020-02-12,327.220001,321.470001,321.470001,327.200012,28432600.0,327.200012
2020-02-13,326.220001,323.350006,324.190002,324.869995,23686900.0,324.869995
2020-02-14,325.980011,322.850006,324.73999,324.950012,20028400.0,324.950012
2020-02-18,319.75,314.609985,315.359985,319.0,38132800.0,319.0
2020-02-19,324.570007,320.0,320.0,323.619995,23496000.0,323.619995
2020-02-20,324.649994,318.209991,322.630005,320.299988,25141500.0,320.299988
2020-02-21,320.450012,310.5,318.619995,313.049988,32388500.0,313.049988
2020-02-24,304.179993,289.230011,297.26001,298.179993,55548800.0,298.179993
2020-02-25,302.529999,286.130005,300.950012,288.079987,57566400.0,288.079987


In [3]:
## Get the Adjusted Close Price.
df = df[['Adj Close']]
df.tail()

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2020-02-19,323.619995
2020-02-20,320.299988
2020-02-21,313.049988
2020-02-24,298.179993
2020-02-25,288.079987


In [4]:
## Predicting N days out into the Future.
forecast_out = 30

## Create Another Column (Target or Dependent Variable) shifted N units up.
df['Prediction'] = df[['Adj Close']].shift(-forecast_out)
df.tail()

Unnamed: 0_level_0,Adj Close,Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-02-19,323.619995,
2020-02-20,320.299988,
2020-02-21,313.049988,
2020-02-24,298.179993,
2020-02-25,288.079987,


In [5]:
## Create Independent Dataset X

## Convert DataFrame to Numpy Array.
X = numpy.array(df.drop(['Prediction'], axis = 1))

## Remove Last N Rows.
X = X[ : - forecast_out]

X

array([[172.07766724],
       [172.61068726],
       [170.91291809],
       [172.7093811 ],
       [173.57803345],
       [173.26216125],
       [172.26522827],
       [170.27131653],
       [170.67602539],
       [176.58860779],
       [178.57266235],
       [179.36231995],
       [181.35621643],
       [183.71533203],
       [185.59080505],
       [184.12005615],
       [185.72898865],
       [192.56942749],
       [188.58164978],
       [186.30148315],
       [184.37667847],
       [186.03497314],
       [186.28173828],
       [187.49586487],
       [188.76921082],
       [191.51329041],
       [192.82611084],
       [193.16168213],
       [194.45475769],
       [197.51470947],
       [196.92247009],
       [198.02798462],
       [196.37957764],
       [196.3006134 ],
       [196.65596008],
       [196.67570496],
       [200.50558472],
       [201.22615051],
       [201.88749695],
       [204.79936218],
       [204.48350525],
       [202.62780762],
       [201.66046143],
       [201

In [6]:
## Create Dependent Dataset y

## Convert DataFrame to Numpy Array.
y = numpy.array(df['Prediction'])

## Get all y values Except Last N Rows.
y = y[ : - forecast_out]

y

array([196.92247009, 198.02798462, 196.37957764, 196.3006134 ,
       196.65596008, 196.67570496, 200.50558472, 201.22615051,
       201.88749695, 204.79936218, 204.48350525, 202.62780762,
       201.66046143, 201.96646118, 198.07736206, 207.80010986,
       206.44779968, 209.01422119, 205.78646851, 200.23907471,
       200.27853394, 198.12670898, 195.38197327, 184.02648926,
       186.93966675, 189.17906189, 188.34672546, 187.27658081,
       181.42047119, 184.89846802, 181.11328125, 178.02174377,
       177.33804321, 176.6047821 , 175.76252747, 176.67414856,
       173.47360229, 171.71972656, 178.00192261, 180.87545776,
       183.53103638, 188.41607666, 190.82392883, 193.03358459,
       192.41925049, 192.3795929 , 190.98246765, 192.12197876,
       196.64039612, 196.06568909, 197.64118958, 196.96737671,
       196.76921082, 193.78666687, 197.97808838, 197.91864014,
       196.11521912, 199.71214294, 200.88136292, 202.54605103,
       202.36767578, 198.19607544, 199.40496826, 201.37

In [7]:
## Split the Dataset into 80 % Training and 20 % Testing.

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

## Create & Train the Support Vector Machine (Regressor).
# Radio Basis Kernal

svr_rbf = svm.SVR(kernel = 'rbf', C = 1e3, gamma = 0.1)
svr_rbf.fit(X_train, y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [8]:
## Tesing the Model.
# Score return theCo-efficicent of Determination R^2 of the Prediction.

svm_confidence = svr_rbf.score(X_test, y_test)

print("Score using SVM : {} %".format( round(svm_confidence * 100, 2) ))

Score using SVM : 91.53 %


In [9]:
## Split the Dataset into 80 % Training and 20 % Testing.

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

## Create & Train the Linear Regression Model.

lr_model = linear_model.LinearRegression()
lr_model.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [10]:
## Tesing the Model.
# Score return th eCo-efficicent of Determination R^2 of the Prediction.

lr_confidence = lr_model.score(X_test, y_test)

print("Score using Linear Regression : {} %".format( round(lr_confidence * 100, 2) ))

Score using Linear Regression : 90.52 %


In [11]:
X_forecast = numpy.array(df.drop(['Prediction'], 1))[-forecast_out : ]

X_forecast

array([[316.20953369],
       [311.93966675],
       [310.60284424],
       [314.49359131],
       [317.97537231],
       [315.82046509],
       [316.94781494],
       [318.47418213],
       [317.55633545],
       [308.21850586],
       [316.93780518],
       [323.572052  ],
       [323.10317993],
       [308.77719116],
       [307.92919922],
       [318.09506226],
       [320.68893433],
       [324.44000244],
       [320.02999878],
       [321.54998779],
       [319.60998535],
       [327.20001221],
       [324.86999512],
       [324.95001221],
       [319.        ],
       [323.61999512],
       [320.29998779],
       [313.04998779],
       [298.17999268],
       [288.07998657]])

In [12]:
## Print Linear Regression Model Predictions for Next N Days.
lr_prediction = lr_model.predict(X_forecast)
print("\n Lienar Regression Model Prediction for Next N Days: \n {}".format(lr_prediction))

## Print Support Vector Regressor Model Predictions for Next N Days.
svm_prediction = svr_rbf.predict(X_forecast)
print("\n Support Vector Regressor Model Prediction for Next N Days: \n {}".format(svm_prediction))


 Lienar Regression Model Prediction for Next N Days: 
 [358.00445058 352.66937575 350.99905482 355.86043037 360.21081469
 357.51832065 358.92691157 360.83406308 359.68724029 348.01989321
 358.91440466 367.20370354 366.61786149 348.71795429 347.65841296
 360.3603638  363.60133167 368.28818242 362.77801083 364.67719299
 362.25321624 371.73673452 368.82544579 368.92542482 361.49105725
 367.26360707 363.11535427 354.05668967 335.47706232 322.85739782]

 Support Vector Regressor Model Prediction for Next N Days: 
 [261.02943942 258.90883058 272.29930305 258.94587211 261.49471528
 260.75005498 261.338172   261.51964231 261.45443407 304.10604571
 261.33544336 261.5394131  261.53941252 299.45095935 305.29937161
 261.50247624 261.53913759 261.53941331 261.53831661 261.53937377
 261.53689327 261.53941333 261.53941332 261.53941332 261.53152589
 261.53941313 261.53878347 256.67141764 321.52654415 301.22986218]
