<a href="https://colab.research.google.com/github/akshatk16/stockPrediction/blob/main/stockPrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [74]:
# predict stock prices using ML

# import dependencies
import quandl
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR

In [75]:
# get stock data
quandl.ApiConfig.api_key = "F2p7bTsyUzsth72dKwdX"
df = quandl.get("WIKI/AMZN")
print(df.head())

             Open   High    Low  ...  Adj. Low  Adj. Close  Adj. Volume
Date                             ...                                   
1997-05-16  22.38  23.75  20.50  ...  1.708333    1.729167   14700000.0
1997-05-19  20.50  21.25  19.50  ...  1.625000    1.708333    6106800.0
1997-05-20  20.75  21.00  19.63  ...  1.635833    1.635833    5467200.0
1997-05-21  19.25  19.75  16.50  ...  1.375000    1.427500   18853200.0
1997-05-22  17.25  17.38  15.75  ...  1.312500    1.395833   11776800.0

[5 rows x 12 columns]


In [76]:
# get only 'adjusted close'
df = df[['Adj. Close']]
print(df.head())

            Adj. Close
Date                  
1997-05-16    1.729167
1997-05-19    1.708333
1997-05-20    1.635833
1997-05-21    1.427500
1997-05-22    1.395833


In [77]:
# how many days in future to predict(n)
predict_for_days = 20

# New column for dependent variables shifted n units up
df['Prediction'] = df[['Adj. Close']].shift(-predict_for_days)
print(df.tail())

            Adj. Close  Prediction
Date                              
2018-03-21     1581.86         NaN
2018-03-22     1544.10         NaN
2018-03-23     1495.56         NaN
2018-03-26     1555.86         NaN
2018-03-27     1497.05         NaN


In [78]:
# independent data set (X)
X = np.array(df.drop(['Prediction'], 1))
# Remove last n rows
X = X[:-predict_for_days]
print(X)

[[   1.72916667]
 [   1.70833333]
 [   1.63583333]
 ...
 [1500.        ]
 [1521.95      ]
 [1511.98      ]]


In [79]:
# dependent data set (y)
y = np.array(df['Prediction'])
y = y[:-predict_for_days]
print(y)

[1.57333333e+00 1.50500000e+00 1.51083333e+00 ... 1.49556000e+03
 1.55586000e+03 1.49705000e+03]


In [80]:
# Split into train/test
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [81]:
# Train using SVM regressor
svr_rbf = SVR('rbf', gamma=0.1, C=1000)
svr_rbf.fit(x_train, y_train)

SVR(C=1000, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [82]:
# Test the svm model
svm_confidence = svr_rbf.score(x_test, y_test)
print(svm_confidence)

0.9541042222345923


In [83]:
# Linear Regression model
lr = LinearRegression()
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [84]:
# Test the lr model
lr_confidence = lr.score(x_test, y_test)
print(lr_confidence)

0.99231442741043


In [85]:
# last n rows from original dataset
x_predict = np.array(df.drop(['Prediction'], 1))[-predict_for_days:]
print(x_predict)

[[1512.45]
 [1493.45]
 [1500.25]
 [1523.61]
 [1537.64]
 [1545.  ]
 [1551.86]
 [1578.89]
 [1598.39]
 [1588.18]
 [1591.  ]
 [1582.32]
 [1571.68]
 [1544.93]
 [1586.51]
 [1581.86]
 [1544.1 ]
 [1495.56]
 [1555.86]
 [1497.05]]


In [86]:
# predict for next n days using svm
svr_prediction = svr_rbf.predict(x_predict)
print('Using SVM: ', svr_prediction)

# predict for next n days using LR
lr_prediction = lr.predict(x_predict)
print('Using LR: ', lr_prediction)

Using SVM:  [1479.18852114  695.3711372  1490.40484545  684.01028737  684.00920112
  684.00920112  684.00920112  684.00920112  684.00920112  684.00920112
  684.00920112  684.00920112  684.00920112  684.00920112  684.00920112
  684.00920112  684.00920112  797.02165866  684.00920112 1023.8828054 ]
Using LR:  [1572.01370161 1552.23724787 1559.31513658 1583.62976602 1598.2331158
 1605.89388946 1613.03423013 1641.16883774 1661.46572447 1650.83848275
 1653.77372483 1644.73900807 1633.66419397 1605.82102884 1649.10023655
 1644.26020971 1604.95711007 1554.4334751  1617.19769407 1555.98436542]
