# Import Packages

In [1]:
import pandas as pd
import yfinance as yf
from yahoofinancials import YahooFinancials
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor


from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
plt.style.use('bmh') #Plotting graph 

import numpy as np

# Original Data Dataframe

In [2]:
def get_originaldata(ticker_name,start_date,end_date):
    
    ticker = ticker_name
    df = yf.download(ticker, start_date= start_date, end_date = end_date, progress=False)

    name = ticker
    filename = "%s.csv" % name
    df.to_csv(filename)
    df = pd.read_csv(filename)
   
    return df

In [3]:
get_originaldata('TSLA', '2019-01-01' , '2019-12-31')

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2010-06-29,3.800000,5.000000,3.508000,4.778000,4.778000,93831500
1,2010-06-30,5.158000,6.084000,4.660000,4.766000,4.766000,85935500
2,2010-07-01,5.000000,5.184000,4.054000,4.392000,4.392000,41094000
3,2010-07-02,4.600000,4.620000,3.742000,3.840000,3.840000,25699000
4,2010-07-06,4.000000,4.000000,3.166000,3.222000,3.222000,34334500
...,...,...,...,...,...,...,...
2590,2020-10-12,442.000000,448.739990,438.579987,442.299988,442.299988,38791100
2591,2020-10-13,443.350006,448.890015,436.600006,446.649994,446.649994,34463700
2592,2020-10-14,449.779999,465.899994,447.350006,461.299988,461.299988,48045400
2593,2020-10-15,450.309998,456.570007,442.500000,448.880005,448.880005,35672400


# Prediction Dataframe (With importing packages and graph code is commented)

In [4]:
def get_prediction(ticker_name,start_date,end_date,future_period):
    
    # Data Preparation
    import pandas as pd
    import yfinance as yf
    from yahoofinancials import YahooFinancials
    from sklearn.model_selection import train_test_split
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.tree import DecisionTreeRegressor

    from sklearn.linear_model import LinearRegression
    import matplotlib.pyplot as plt
    plt.style.use('bmh')

    import numpy as np

    #Download the data from Yfinance
    ticker = ticker_name
    df = yf.download(ticker, start_date= start_date, end_date = end_date, progress=False)

    name = ticker
    filename = "%s.csv" % name
    df.to_csv(filename)
    df = pd.read_csv(filename)
    
    #Visualise the close price
#     plt.figure(figsize=(16,8))
#     plt.title('Stock')
#     plt.xlabel('Days')
#     plt.ylabel('Close Price USD($)')
#     plt.plot(df['Close'])
#     plt.show()
    
    #Get the Date
    df_days = df.loc[:, 'Date']

    #Get the close price
    df = df [['Close']]
    
    # Linear Regression Prediction

    #Create a variable to predict 'x' days out into the future
    future_days = 25 #Change accordingly to how many days to predict
    future_period = future_days
    

    #Create a new column (target) shifted 'x' units/days up
    df['Predicted Values'] = df[['Close']].shift(-future_days)

    #Create the feature data set (X) and convert it to a numpy array and remove the last 'x' rows/days
    X = np.array(df.drop(['Predicted Values'], 1))[:-future_days]

    #Create the target data set(y) and convert it to a numpy array and get all of the target values except the last 'x' rows/days 
    y = np.array(df['Predicted Values'])[:-future_days]

    #Split the data into 75% training and 25% testing
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

    #Create the decision tree regressor model
    tree = DecisionTreeRegressor().fit(x_train, y_train)
    #Create the linear regression model
    lr = LinearRegression().fit(x_train, y_train)

    #Getting predictions for the current selected dates (For Performance ranking purpose)
    tree_predictions = tree.predict(x_test)
    lr_predictions = lr.predict(x_test)

    #Get the last 'x' rows of the feature data set
    x_future = df.drop(['Predicted Values'], 1)[:-future_days]
    x_future = x_future.tail(future_days)
    x_future = np.array(x_future)
    x_future

    #Show the model tree prediction
    tree_prediction = tree.predict(x_future)
    #Show the model linear regression prediction
    lr_prediction = lr.predict(x_future)
    
    #Add new columns
    df['Date'] = df_days
    df = df[['Date', 'Close', 'Predicted Values']]
    
    #Save Dataframe to csv file
    name = ticker_name + " Prediction"
    filename = "%s.csv" % name
    df.to_csv(filename)
    
    return df

In [5]:
get_prediction('TSLA', '2019-01-01' , '2019-12-31', 25)

Unnamed: 0,Date,Close,Predicted Values
0,2010-06-29,4.778000,4.252
1,2010-06-30,4.766000,4.090
2,2010-07-01,4.392000,3.918
3,2010-07-02,3.840000,3.920
4,2010-07-06,3.222000,3.806
...,...,...,...
2590,2020-10-12,442.299988,
2591,2020-10-13,446.649994,
2592,2020-10-14,461.299988,
2593,2020-10-15,448.880005,


# SVR Prediction Dataframe

In [6]:
def get_prediction(ticker_name,start_date,end_date,future_period):

    # SVR (RBF, Linear and Polynomial) Prediction
    #Import the libraries
    from sklearn.svm import SVR
    import matplotlib.pyplot as plt

    plt.style.use('fivethirtyeight')

    #Download the data from Yfinance
    ticker = ticker_name
    df = yf.download(ticker, start_date= start_date, end_date = end_date, progress=False)

    name = ticker
    filename = "%s.csv" % name
    df.to_csv(filename)
    df = pd.read_csv(filename)

    #Get and print the last row of data 
    actual_price = df.tail(1)
    actual_price

    #Prepare the data for training the SVR models
    #Get all of the data except for the last row
    df = df.head(len(df)-1)

    #Create empty lists to store the independent and dependent data
    days = list()
    adj_close_prices = list()

    #Get the dates and adjusted close prices
    df_days = df.loc[:, 'Date']
    df_adj_close = df.loc[:, 'Close']

    #Create the independent data set
    for day in df_days:
        days.append([int(day.split('-')[1])])

    #Create the dependent data set
    for adj_close_price in df_adj_close:
        adj_close_prices.append(float(adj_close_price))

    #Create the 3 Support Vector Regression Models

    #Create and train a SVR model using a linear kernel
    lin_svr = SVR(kernel='linear', C=1000.0)
    lin_svr.fit(days, adj_close_prices)

    #Create and train a SVR model using a polynomial kernel
    poly_svr = SVR(kernel='poly', C=1000.0, degree = 2)
    poly_svr.fit(days, adj_close_prices)

    #Create and train a SVR model using a rbf kernel
    rbf_svr = SVR(kernel='rbf', C=1000.0, gamma = 0.15)
    rbf_svr.fit(days, adj_close_prices)

    #plot the models on a graph to see which has the best fit to the oginal data 
#     plt.figure(figsize=(16,8))
#     plt.scatter(days, adj_close_prices, color = 'red', label = 'Data')
#     plt.plot(days, rbf_svr.predict(days), color = 'green', label = 'RBF Model')
#     plt.plot(days, poly_svr.predict(days), color = 'orange', label = 'Polynomial Model')
#     plt.plot(days, lin_svr.predict(days), color = 'blue', label = 'Linear Model')
#     plt.legend()
#     plt.show()
    
    #Get the Date
    df_days = df.loc[:, 'Date']

    # Create DataFrame
    Predicted = {'Date': df_days, 'RBF SVR Prediction': rbf_svr.predict(days) , 
            'Linear SVR Prediction': poly_svr.predict(days), 
            'Polymomial SVR Prediction': lin_svr.predict(days)     
           }
    
    df = pd.DataFrame(Predicted, columns = ['Date', 'RBF SVR Prediction', 'Linear SVR Prediction','Polymomial SVR Prediction'])
    
    #Save Dataframe to csv file
    name = ticker_name + " SVR"
    filename = "%s.csv" % name
    df.to_csv(filename)
    
    return df

In [7]:
get_prediction('TSLA', '2019-01-01' , '2019-12-31', 25)

Unnamed: 0,Date,RBF SVR Prediction,Linear SVR Prediction,Polymomial SVR Prediction
0,2010-06-29,44.472312,44.081566,43.935332
1,2010-06-30,44.472312,44.081566,43.935332
2,2010-07-01,45.151721,44.064557,44.011666
3,2010-07-02,45.151721,44.064557,44.011666
4,2010-07-06,45.151721,44.064557,44.011666
...,...,...,...,...
2589,2020-10-09,43.216019,43.997829,44.240666
2590,2020-10-12,43.216019,43.997829,44.240666
2591,2020-10-13,43.216019,43.997829,44.240666
2592,2020-10-14,43.216019,43.997829,44.240666


# Combination of Linear and SVR Dataframe

In [8]:
def get_prediction(ticker_name,start_date,end_date,future_period):
    
    #Download the data from Yfinance
    ticker = ticker_name
    df = yf.download(ticker, start_date= start_date, end_date = end_date, progress=False)

    name = ticker
    filename = "%s.csv" % name
    df.to_csv(filename)
    df = pd.read_csv(filename)
        
    # SVR (RBF, Linear and Polynomial) Prediction
    #Import the libraries
    from sklearn.svm import SVR
 
    #Download the data from Yfinance
    ticker = ticker_name
    df = yf.download(ticker, start_date= start_date, end_date = end_date, progress=False)

    name = ticker
    filename = "%s.csv" % name
    df.to_csv(filename)
    df = pd.read_csv(filename)
    
    #Get the last row of data 
    actual_price = df.tail(1)

    #Prepare the data for training the SVR models
    #Get all of the data except for the last row
    df = df.head(len(df)-1)

    #Create empty lists to store the independent and dependent data
    days = list()
    adj_close_prices = list()

    #Get the dates and adjusted close prices
    df_days = df.loc[:, 'Date']
    df_adj_close = df.loc[:, 'Close']

    #Create the independent data set
    for day in df_days:
        days.append([int(day.split('-')[1])])

    #Create the dependent data set
    for adj_close_price in df_adj_close:
        adj_close_prices.append(float(adj_close_price))

    #Create the 3 Support Vector Regression Models

    #Create and train a SVR model using a linear kernel
    lin_svr = SVR(kernel='linear', C=1000.0)
    lin_svr.fit(days, adj_close_prices)

    #Create and train a SVR model using a polynomial kernel
    poly_svr = SVR(kernel='poly', C=1000.0, degree = 2)
    poly_svr.fit(days, adj_close_prices)

    #Create and train a SVR model using a rbf kernel
    rbf_svr = SVR(kernel='rbf', C=1000.0, gamma = 0.15)
    rbf_svr.fit(days, adj_close_prices)
    
    #Get the close price
    df = df [['Close']]
    
    # Linear Regression Prediction

    #Create a variable to predict 'x' days out into the future
    future_days = 25 #Change accordingly to how many days to predict
    future_period = future_days
    #Create a new column (target) shifted 'x' units/days up
    df['Predicted Values'] = df[['Close']].shift(-future_days)

    #Create the feature data set (X) and convert it to a numpy array and remove the last 'x' rows/days
    X = np.array(df.drop(['Predicted Values'], 1))[:-future_days]

    #Create the target data set(y) and convert it to a numpy array and get all of the target values except the last 'x' rows/days 
    y = np.array(df['Predicted Values'])[:-future_days]

    #Split the data into 75% training and 25% testing
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

    #Create the decision tree regressor model
    tree = DecisionTreeRegressor().fit(x_train, y_train)
    #Create the linear regression model
    lr = LinearRegression().fit(x_train, y_train)

    #Getting predictions for the current selected dates (For Performance ranking purpose)
    tree_predictions = tree.predict(x_test)
    lr_predictions = lr.predict(x_test)

    #Get the last 'x' rows of the feature data set
    x_future = df.drop(['Predicted Values'], 1)[:-future_days]
    x_future = x_future.tail(future_days)
    x_future = np.array(x_future)
    x_future

    #Show the model tree prediction
    tree_prediction = tree.predict(x_future)
    #Show the model linear regression prediction
    lr_prediction = lr.predict(x_future)
    
    #Save Dataframe to csv file
    name = ticker_name + " Prediction"
    filename = "%s.csv" % name
    df.to_csv(filename)
    
    # Add new columns

    df['Date'] = df_days
    df['RBF SVR Prediction'] = rbf_svr.predict(days)
    df['Linear SVR Prediction'] = poly_svr.predict(days)
    df['Polymomial SVR Prediction'] = lin_svr.predict(days)  

    
    df = df[['Date', 'Close', 'Predicted Values', 'RBF SVR Prediction', 'Linear SVR Prediction', 'Polymomial SVR Prediction']]
    #Save Dataframe to csv file
    name = ticker_name + " Prediction"
    filename = "%s.csv" % name
    df.to_csv(filename)
    
    return df

In [9]:
get_prediction('TSLA', '2019-01-01' , '2019-12-31', 25)

Unnamed: 0,Date,Close,Predicted Values,RBF SVR Prediction,Linear SVR Prediction,Polymomial SVR Prediction
0,2010-06-29,4.778000,4.252,44.472312,44.081566,43.935332
1,2010-06-30,4.766000,4.090,44.472312,44.081566,43.935332
2,2010-07-01,4.392000,3.918,45.151721,44.064557,44.011666
3,2010-07-02,3.840000,3.920,45.151721,44.064557,44.011666
4,2010-07-06,3.222000,3.806,45.151721,44.064557,44.011666
...,...,...,...,...,...,...
2589,2020-10-09,434.000000,,43.216019,43.997829,44.240666
2590,2020-10-12,442.299988,,43.216019,43.997829,44.240666
2591,2020-10-13,446.649994,,43.216019,43.997829,44.240666
2592,2020-10-14,461.299988,,43.216019,43.997829,44.240666


# Ranking Performance

In [12]:
def get_performance(ticker_name,start_date,end_date,future_period):
    
    # Data Preparation
    import pandas as pd
    import yfinance as yf
    from yahoofinancials import YahooFinancials
    from sklearn.model_selection import train_test_split
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.tree import DecisionTreeRegressor

    from sklearn.linear_model import LinearRegression
    import matplotlib.pyplot as plt
    plt.style.use('bmh')

    import numpy as np

    #Download the data from Yfinance
    ticker = ticker_name
    df = yf.download(ticker, start_date= start_date, end_date = end_date, progress=False)

    name = ticker
    filename = "%s.csv" % name
    df.to_csv(filename)
    df = pd.read_csv(filename)
    
    
    #SVR Prediction
    from sklearn.svm import SVR
    #Get and print the last row of data 
    actual_price = df.tail(1)
    actual_price

    #Prepare the data for training the SVR models
    #Get all of the data except for the last row
    df = df.head(len(df)-1)
    #Print the new data set


    #Create empty lists to store the independent and dependent data
    days = list()
    adj_close_prices = list()

    #Get the dates and adjusted close prices
    df_days = df.loc[:, 'Date']
    df_adj_close = df.loc[:, 'Close']

    #Create the independent data set
    for day in df_days:
        days.append([int(day.split('-')[1])])

    #Create the dependent data set
    for adj_close_price in df_adj_close:
        adj_close_prices.append(float(adj_close_price))

    #Create the 3 Support Vector Regression Models

    #Create and train a SVR model using a linear kernel
    lin_svr = SVR(kernel='linear', C=1000.0)
    lin_svr.fit(days, adj_close_prices)

    #Create and train a SVR model using a polynomial kernel
    poly_svr = SVR(kernel='poly', C=1000.0, degree = 2)
    poly_svr.fit(days, adj_close_prices)

    #Create and train a SVR model using a rbf kernel
    rbf_svr = SVR(kernel='rbf', C=1000.0, gamma = 0.15)
    rbf_svr.fit(days, adj_close_prices)

    #Show the predicted price for the given day
    day = [[31]]

    rbf_svr.predict(day)
    lin_svr.predict(day)
    poly_svr.predict(day)

    #Print the actual price of the stock on day 31
    #print('The actual price:', actual_price['Close'])
    
    
    #Get the Date
    df_days = df.loc[:, 'Date']

    #Get the close price
    df = df [['Close']]
    
    # Linear Regression Prediction

    #Create a variable to predict 'x' days out into the future
    future_days = 25 #Change accordingly to how many days to predict
    future_period = future_days
    

    #Create a new column (target) shifted 'x' units/days up
    df['Predicted Values'] = df[['Close']].shift(-future_days)

    #Create the feature data set (X) and convert it to a numpy array and remove the last 'x' rows/days
    X = np.array(df.drop(['Predicted Values'], 1))[:-future_days]

    #Create the target data set(y) and convert it to a numpy array and get all of the target values except the last 'x' rows/days 
    y = np.array(df['Predicted Values'])[:-future_days]

    #Split the data into 75% training and 25% testing
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

    #Create the decision tree regressor model
    tree = DecisionTreeRegressor().fit(x_train, y_train)
    #Create the linear regression model
    lr = LinearRegression().fit(x_train, y_train)

    #Getting predictions for the current selected dates (For Performance ranking purpose)
    tree_predictions = tree.predict(x_test)
    lr_predictions = lr.predict(x_test)

    #Get the last 'x' rows of the feature data set
    x_future = df.drop(['Predicted Values'], 1)[:-future_days]
    x_future = x_future.tail(future_days)
    x_future = np.array(x_future)
    x_future

    #Show the model tree prediction
    tree_prediction = tree.predict(x_future)
    #Show the model linear regression prediction
    lr_prediction = lr.predict(x_future)
    
    df = df[['Close', 'Predicted Values']]

    
    from math import sqrt
    from sklearn.metrics import mean_squared_error
    
    #Get the performance ranking
    #Root Mean Square Error
    
    #Linear Regression
    tree_predictions_rsme = round(sqrt(mean_squared_error(y_test, tree_predictions)),2)
    lr_predictions_rsme = round(sqrt(mean_squared_error(y_test, lr_predictions)),2)
    
    
    rbf_svr_rsme = round(sqrt(mean_squared_error(actual_price['Close'], rbf_svr.predict(day))),2)
    lin_svr_rsme = round(sqrt(mean_squared_error(actual_price['Close'], lin_svr.predict(day))),2)
    poly_svr_rsme = round(sqrt(mean_squared_error(actual_price['Close'], poly_svr.predict(day))),2)
       
    #Create DataFrame
    Performance = {'Tree Prediction': [tree_predictions_rsme], 
            'Linear Prediction': [lr_predictions_rsme], 
            'RBF SVR Prediction': [rbf_svr_rsme] , 
            'Linear SVR Prediction': [lin_svr_rsme], 
            'Polymomial SVR Prediction': [poly_svr_rsme], 
           
           }
    
    df = pd.DataFrame(Performance, columns = ['Tree Prediction', 'Linear Prediction', 'RBF SVR Prediction', 'Linear SVR Prediction',
                                              'Polymomial SVR Prediction'])
    
    #Save Dataframe to CSV file
    filename = "%s.csv" % "Ticker Performance"
    df.to_csv(filename)                           
    
    return df

In [13]:
get_performance('TSLA', '2019-01-01' , '2019-12-31', 25)

Unnamed: 0,Tree Prediction,Linear Prediction,RBF SVR Prediction,Linear SVR Prediction,Polymomial SVR Prediction
0,17.34,18.27,398.72,393.83,396.8
