In [60]:
pip install svm

Note: you may need to restart the kernel to use updated packages.


### Machine Learning for predicting stock prices

In [61]:
import quandl
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR 
from sklearn.model_selection import train_test_split

In [89]:
# Get the stock data 
import yfinance as yf

df = yf.download("TSLA", start="2017-01-01", end="2020-07-20",
                   group_by="ticker")
df.tail()


[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-07-13,1659.0,1794.98999,1471.109985,1497.060059,1497.060059,38985400
2020-07-14,1556.0,1590.0,1431.0,1516.800049,1516.800049,23418100
2020-07-15,1543.0,1550.0,1457.0,1546.01001,1546.01001,16367800
2020-07-16,1477.160034,1531.709961,1466.0,1500.640015,1500.640015,14300800
2020-07-17,1513.449951,1537.51001,1490.0,1500.839966,1500.839966,9304700


In [91]:
# get the adjusted close price 
df = df[['Adj Close']]
df.tail()

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2020-07-13,1497.060059
2020-07-14,1516.800049
2020-07-15,1546.01001
2020-07-16,1500.640015
2020-07-17,1500.839966


In [93]:
# predict the future value of stock prices, 'n' days out into the future
forecast_out = 30
# create another column (target variable) shifted 'n' units up 
df['Prediction'] = df[['Adj Close']].shift(-forecast_out)
df.tail()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


Unnamed: 0_level_0,Adj Close,Prediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-07-13,1497.060059,
2020-07-14,1516.800049,
2020-07-15,1546.01001,
2020-07-16,1500.640015,
2020-07-17,1500.839966,


In [101]:
# independent variavle 
# convert df to a numpy array 
X = np.array(df.drop(['Prediction'],axis=1))
# Remove the last 'n' rows
X = X[:-forecast_out] # for every column get all the rows except the last 30 rows
X

array([[216.99000549],
       [226.99000549],
       [226.75      ],
       [229.00999451],
       [231.27999878],
       [229.86999512],
       [229.72999573],
       [229.58999634],
       [237.75      ],
       [235.58000183],
       [238.36000061],
       [243.75999451],
       [244.72999573],
       [248.91999817],
       [254.61000061],
       [254.47000122],
       [252.50999451],
       [252.94999695],
       [250.63000488],
       [251.92999268],
       [249.24000549],
       [251.55000305],
       [251.33000183],
       [257.76998901],
       [257.48001099],
       [262.07998657],
       [269.20001221],
       [269.23001099],
       [280.6000061 ],
       [280.98001099],
       [279.76000977],
       [268.95001221],
       [272.23001099],
       [277.39001465],
       [273.51000977],
       [255.99000549],
       [257.        ],
       [246.22999573],
       [249.99000549],
       [250.02000427],
       [250.47999573],
       [251.57000732],
       [251.21000671],
       [248

In [95]:
### Dependent dataset (y) ####
# convert the dataframe to a numpy array (ALl of the values including the NaN's)
y=np.array(df['Prediction'])
# Get all of the y values except the last 'n' rows
y = y[:-forecast_out]
y

array([ 279.76000977,  268.95001221,  272.23001099,  277.39001465,
        273.51000977,  255.99000549,  257.        ,  246.22999573,
        249.99000549,  250.02000427,  250.47999573,  251.57000732,
        251.21000671,  248.58999634,  246.86999512,  244.8999939 ,
        243.69000244,  246.16999817,  258.        ,  255.72999573,
        262.04998779,  261.5       ,  261.92001343,  250.67999268,
        255.00999451,  254.77999878,  263.16000366,  270.22000122,
        277.45001221,  277.38000488,  277.92001343,  278.29998779,
        298.51998901,  303.70001221,  295.        ,  298.70001221,
        302.54000854,  312.39001465,  308.70999146,  296.83999634,
        304.        ,  301.44000244,  300.25      ,  305.51998901,
        302.51000977,  305.6000061 ,  308.02999878,  313.79000854,
        310.17001343,  308.63000488,  314.07000732,  322.82998657,
        318.89001465,  311.01998901,  295.45999146,  308.3500061 ,
        307.19000244,  321.26000977,  325.22000122,  323.10000

In [96]:
x_train, x_test, y_train, y_test =train_test_split(X, y, test_size=0.2)
svr_rbf = SVR(kernel = 'rbf', C=1e3, gamma=0.1) #create and train the support vector machine (regressor)
svr_rbf.fit(x_train, y_train)
svr_confidence = svr_rbf.score(x_test, y_test)
print("svm confidence:", svr_confidence)

svm confidence: 0.5878508896421082


In [97]:
lr = LinearRegression()
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [98]:
lr_confidence = lr.score(x_test, y_test) #r2
print("lr confidence:", lr_confidence)

lr confidence: 0.6923860850669271


In [70]:
# set x_forecast equal to the last 30 rows of the original dataset from Adj. Close 
x_forecast = np.array(df.drop(['Prediction'], axis=1))[-forecast_out:]
print(x_forecast)

[[323.66 ]
 [322.31 ]
 [334.065]
 [335.49 ]
 [334.77 ]
 [333.3  ]
 [346.17 ]
 [352.05 ]
 [357.42 ]
 [350.99 ]
 [343.06 ]
 [330.93 ]
 [335.12 ]
 [333.35 ]
 [328.2  ]
 [332.3  ]
 [329.1  ]
 [327.17 ]
 [345.51 ]
 [341.84 ]
 [326.63 ]
 [325.6  ]
 [321.35 ]
 [313.56 ]
 [310.55 ]
 [316.53 ]
 [309.1  ]
 [301.54 ]
 [304.18 ]
 [279.18 ]]


In [99]:
# print the predictions for the next 'n' days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

[351.00688302 349.36571809 363.6560098  365.38835056 364.5130626
 362.72601634 378.37178872 385.51997377 392.04816318 384.23135537
 374.59103098 359.84486012 364.9385498  362.78680022 356.52605991
 361.51033861 357.62016987 355.27391185 377.56944142 373.10790415
 354.61744588 353.36529782 348.19866747 338.72853795 335.06934798
 342.33910081 333.30661528 324.11609164 327.32548085 296.9335376 ]
[339.93655386 302.00324499 314.00166485 340.37392473 325.90027539
 309.92379863 324.42371665 327.99219313 304.80021377 325.7862162
 344.04026139 347.51652293 333.06379585 309.83733039 347.10729067
 321.1531494  353.85044456 338.20070831 320.29487385 339.5736886
 335.75459158 340.80905777 294.00050021 354.14320404 316.26777095
 318.21089605 316.80398984 333.3651206  318.03631581 306.7499781 ]
