In [9]:
# importing libraries
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import plotly.graph_objects as go
from datetime import datetime

In [10]:
# data preparation function
def prep(df, fore_col, fore_out, test_size):
    label = df[fore_col].shift(-fore_out) # creating 'fore_out' number of empty/NaN rows from the bottom.
    # feature array.
    X = np.array(df[[fore_col]])
    X = preprocessing.scale(X)
    # creating column, to be used later in the predicting method.
    X_latest = X[-fore_out:]
    X = X[:-fore_out]
    label.dropna(inplace=True) #drop empty values.
    y = np.array(label) # values of label column are the results, present in the data.
    X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=test_size, random_state=0)

    response = [X_train, X_test, Y_train, Y_test, X_latest]
    return response

In [11]:
# read the data.
data = pd.read_csv(r'C:\Users\arnav\OneDrive\Desktop\Projects\proj-2\AAPL.csv')
# r is used before the path so that '\' is not considered as an escape character
data=data[data['symbol']=='AAPL']
data.head()

Unnamed: 0,symbol,date,close,high,low,open,volume,adjClose,adjHigh,adjLow,adjOpen,adjVolume,divCash,splitFactor
0,AAPL,2015-05-27,132.045,132.26,130.05,130.34,45833246,121.682558,121.880685,119.844118,120.11136,45833246,0.0,1
1,AAPL,2015-05-28,131.78,131.95,131.1,131.86,30733309,121.438354,121.595013,120.811718,121.512076,30733309,0.0,1
2,AAPL,2015-05-29,130.28,131.45,129.9,131.23,50884452,120.056069,121.134251,119.70589,120.931516,50884452,0.0,1
3,AAPL,2015-06-01,130.535,131.39,130.05,131.2,32112797,120.291057,121.07896,119.844118,120.90387,32112797,0.0,1
4,AAPL,2015-06-02,129.96,130.655,129.32,129.86,33667627,119.761181,120.40164,119.171406,119.669029,33667627,0.0,1


In [12]:
fig = go.Figure(data=go.Candlestick(x=data['date'],
                                    open = data['open'],
                                    high=data['high'],
                                    low=data['low'],
                                    close=data['close']))
fig.show()

In [13]:
fore_col = 'close'
fore_out = 5
test_size = 0.2

In [14]:
X_train, X_test, Y_train, Y_test , X_latest =prep(data,fore_col,fore_out,test_size)
model = LinearRegression()
model.fit(X_train, Y_train)

In [15]:
# predicting prices
score = model.score(X_test, Y_test)
forecast = model.predict(X_latest)

In [16]:
forecast=pd.DataFrame({'Predicted prices':forecast}, index=pd.date_range('2020-05-11','2020-05-15', freq='B'))
print(forecast)

            Predicted prices
2020-05-11        316.535020
2020-05-12        314.706259
2020-05-13        320.825575
2020-05-14        318.434118
2020-05-15        320.483939
