In [1]:
#Install the dependencies
import quandl
import numpy as np 
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
import FinanceDataManager
import Preprocessing

In [2]:
data_manager = FinanceDataManager.FinanceDataManager()

df = data_manager.get_prices(stock_name="TSLA", start = "2014-01-01", end = "2019-10-01")
df = df[['Close', 'Date']]
df = df.rename(columns = {'Close': 'Adj. Close'})
#Get the stock data

#df = quandl.get("WIKI/AMZN")

#df = df[['Close', 'Adj. Close']]
print(df.tail())

      Adj. Close       Date
1442  223.210007 2019-09-24
1443  228.699997 2019-09-25
1444  242.559998 2019-09-26
1445  242.130005 2019-09-27
1446  240.869995 2019-09-30


In [3]:

# Get the Adjusted Close Price
df = df[['Adj. Close']]
#Take a look at the new data
print(df.tail())

      Adj. Close
1442  223.210007
1443  228.699997
1444  242.559998
1445  242.130005
1446  240.869995


In [4]:
# A variable for predicting 'n' days out into the future
forecast_out = 5 #'n=30' days
#Create another column (the target or dependent variable) shifted 'n' units up
df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)
#print the new data set
print(df.tail())

      Adj. Close  Prediction
1442  223.210007         NaN
1443  228.699997         NaN
1444  242.559998         NaN
1445  242.130005         NaN
1446  240.869995         NaN


In [5]:
### Create the independent data set (X)  #######
# Convert the dataframe to a numpy array
X = np.array(df.drop(['Prediction'],1))

#Remove the last 'n' rows
X = X[:-forecast_out]
print(X)

[[150.42999268]
 [150.1000061 ]
 [149.55999756]
 ...
 [246.6000061 ]
 [240.61999512]
 [241.22999573]]


In [6]:
### Create the dependent data set (y)  #####
# Convert the dataframe to a numpy array (All of the values including the NaN's)
y = np.array(df['Prediction'])
# Get all of the y values except the last 'n' rows
y = y[:-forecast_out]
print(y)

[151.27999878 147.52999878 145.72000122 ... 242.55999756 242.13000488
 240.86999512]


In [7]:
# Split the data into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [8]:
# Create and train the Support Vector Machine (Regressor)
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(x_train, y_train)

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [9]:

# Testing Model: Score returns the coefficient of determination R^2 of the prediction. 
# The best possible score is 1.0
svm_confidence = svr_rbf.score(x_test, y_test)
print("svm confidence: ", svm_confidence)

svm confidence:  0.880557550545836


In [10]:
# Create and train the Linear Regression  Model
lr = LinearRegression()
# Train the model
lr.fit(x_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [11]:
# Testing Model: Score returns the coefficient of determination R^2 of the prediction. 
# The best possible score is 1.0
lr_confidence = lr.score(x_test, y_test)
print("lr confidence: ", lr_confidence)

lr confidence:  0.9119054395626625


In [12]:

# Set x_forecast equal to the last 30 rows of the original data set from Adj. Close column
x_forecast = np.array(df.drop(['Prediction'],1))[-forecast_out:]
print(x_forecast)

[[223.21000671]
 [228.69999695]
 [242.55999756]
 [242.13000488]
 [240.86999512]]


In [13]:
# Print linear regression model predictions for the next 'n' days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)

# Print support vector regressor model predictions for the next 'n' days
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

[225.46120432 230.66284529 243.79488089 243.3874726  242.19364198]
[223.30043947 228.83185522 242.28051671 240.21818521 231.96190418]
