In [1]:
import pandas as pd
import numpy as np
import keras
import tensorflow as tf
from keras.preprocessing.sequence import TimeseriesGenerator
import plotly.graph_objs as go
from keras.layers import Dropout

In [2]:
filename = "VGT.csv"
df = pd.read_csv(filename)
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1258 entries, 0 to 1257
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       1258 non-null   object 
 1   Open       1258 non-null   float64
 2   High       1258 non-null   float64
 3   Low        1258 non-null   float64
 4   Close      1258 non-null   float64
 5   Adj Close  1258 non-null   float64
 6   Volume     1258 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 68.9+ KB
None


In [3]:
df['Date'] = pd.to_datetime(df['Date'])
df.set_axis(df['Date'], inplace=True)
df.drop(columns=['Open', 'High', 'Low', 'Volume'], inplace=True)

In [4]:
close_data = df['Close'].values
close_data = close_data.reshape((-1,1))

split_percent = 0.80
split = int(split_percent*len(close_data))

close_train = close_data[:split]
close_test = close_data[split:]

date_train = df['Date'][:split]
date_test = df['Date'][split:]

print(len(close_train))
print(len(close_test))

1006
252


In [6]:
look_back = 10

train_generator = TimeseriesGenerator(close_train, close_train, length=look_back, batch_size=20)     
test_generator = TimeseriesGenerator(close_test, close_test, length=look_back, batch_size=1)


In [7]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

model = Sequential()
model.add(
    LSTM(units = 50,return_sequences = True,
        activation='relu',
        input_shape=(look_back,1))
)

model.add(
    LSTM(30,
        activation='relu',
        input_shape=(look_back,1), return_sequences = True)
)

model.add(
    LSTM(10,
        activation='relu',
        input_shape=(look_back,1))
)

model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

num_epochs = 50
model.fit(train_generator, epochs=num_epochs, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x2567a3dddc0>

In [9]:
prediction = model.predict(test_generator)

close_train = close_train.reshape((-1))
close_test = close_test.reshape((-1))
prediction = prediction.reshape((-1))

trace1 = go.Scatter(
    x = date_train,
    y = close_train,
    mode = 'lines',
    name = 'Data'
)
trace2 = go.Scatter(
    x = date_test,
    y = prediction,
    mode = 'lines',
    name = 'Prediction'
)
trace3 = go.Scatter(
    x = date_test,
    y = close_test,
    mode='lines',
    name = 'Ground Truth'
)
layout = go.Layout(
    title = "Google Stock",
    xaxis = {'title' : "Date"},
    yaxis = {'title' : "Close"}
)
fig = go.Figure(data=[trace1, trace2, trace3], layout=layout)
fig.show()

In [10]:
close_data = close_data.reshape((-1))
look_back = 10
def predict(num_prediction, model):
    prediction_list = close_data[-look_back:]
    
    for _ in range(num_prediction):
        x = prediction_list[-look_back:]
        x = x.reshape((1, look_back, 1))
        out = model.predict(x)[0][0]
        prediction_list = np.append(prediction_list, out)
    prediction_list = prediction_list[look_back-1:]
        
    return prediction_list
    
def predict_dates(num_prediction):
    last_date = df['Date'].values[-1]
    prediction_dates = pd.date_range(last_date, periods=num_prediction+1).tolist()
    return prediction_dates

num_prediction = 30
forecast = predict(num_prediction, model)
forecast_dates = predict_dates(num_prediction)


In [11]:
close_train = close_train.reshape((-1))
close_test = close_test.reshape((-1))
prediction = forecast.reshape((-1))

trace1 = go.Scatter(
    x = forecast_dates,
    y = close_train,
    mode = 'lines',
    name = 'Data'
)
trace2 = go.Scatter(
    x = forecast_dates,
    y = prediction,
    mode = 'lines',
    name = 'Prediction'
)
trace3 = go.Scatter(
    x = date_test,
    y = close_test,
    mode='lines',
    name = 'Ground Truth'
)
layout = go.Layout(
    title = "Google Stock",
    xaxis = {'title' : "Date"},
    yaxis = {'title' : "Close"}
)
fig = go.Figure(data=[trace2, trace3], layout=layout)
fig.show()

## ADJUSTED THIS IS FOR PORTFOLIO OPTIMIZATION

In [12]:
adj_close_data = df["Adj Close"].values
adj_close_data = adj_close_data.reshape((-1))
look_back = 10
def predict(num_prediction, model):
    prediction_list = adj_close_data[-look_back:]
    
    for _ in range(num_prediction):
        x = prediction_list[-look_back:]
        x = x.reshape((1, look_back, 1))
        out = model.predict(x)[0][0]
        prediction_list = np.append(prediction_list, out)
    prediction_list = prediction_list[look_back-1:]
        
    return prediction_list
    
def predict_dates(num_prediction):
    last_date = df['Date'].values[-1]
    prediction_dates = pd.date_range(last_date, periods=num_prediction+1).tolist()
    return prediction_dates

num_prediction = 30
adj_forecast = predict(num_prediction, model)
forecast_dates = predict_dates(num_prediction)



#--------------------------

adj_close_train = adj_close_data[:split]
adj_close_test = adj_close_data[split:]

adj_close_train = adj_close_train.reshape((-1))
adj_close_test = adj_close_test.reshape((-1))
adj_prediction = adj_forecast.reshape((-1))

trace1 = go.Scatter(
    x = forecast_dates,
    y = adj_close_train,
    mode = 'lines',
    name = 'Data'
)
trace2 = go.Scatter(
    x = forecast_dates,
    y = adj_prediction,
    mode = 'lines',
    name = 'Prediction'
)
trace3 = go.Scatter(
    x = date_test,
    y = adj_close_test,
    mode='lines',
    name = 'Ground Truth'
)
layout = go.Layout(
    title = "Google Stock",
    xaxis = {'title' : "Date"},
    yaxis = {'title' : "Close"}
)
fig = go.Figure(data=[ trace2, trace3], layout=layout)
fig.show()

## ADDING ALL INFO INTO A FILE

In [13]:
#get all close prices values with predicted values (for protfolio prediction)
import datetime

indexes = pd.date_range(start='31/12/2021', end='1/30/2022')

newdf = pd.DataFrame(indexes, columns = ["Date"])
newdf["Close"] = forecast
newdf["Adj Close"] = adj_forecast
dffinal = df.loc[:,["Date" ,"Close","Adj Close"]]
dffinal = dffinal.append(newdf)
dffinal['Date'] = pd.to_datetime(dffinal['Date'])
dffinal

Unnamed: 0,Date,Close,Adj Close
2017-01-03 00:00:00,2017-01-03,122.459999,115.922997
2017-01-04 00:00:00,2017-01-04,123.199997,116.623497
2017-01-05 00:00:00,2017-01-05,123.290001,116.708687
2017-01-06 00:00:00,2017-01-06,124.419998,117.778351
2017-01-09 00:00:00,2017-01-09,124.639999,117.986595
...,...,...,...
26,2022-01-26,498.862213,498.023590
27,2022-01-27,500.176117,499.335144
28,2022-01-28,501.376923,500.534119
29,2022-01-29,502.722748,501.877228


In [14]:
dffinal.to_csv("VGTpredict.csv", index = False)

## PORTFOLIO OPTIMIZATION STATS

In [49]:
dffinal = pd.read_csv("vgtpredict.csv")

In [50]:
dffinal.set_axis(dffinal['Date'], inplace=True)
dffinal.drop(columns=["Date"],inplace = True)

In [51]:
#this are the predicted valyes 

dffinal.tail(60)

Unnamed: 0_level_0,Close,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-11-18,454.820007,453.246307
2021-11-19,457.470001,455.887115
2021-11-22,450.640015,449.08075
2021-11-23,448.579987,447.027863
2021-11-24,452.309998,450.744965
2021-11-26,442.119995,440.59021
2021-11-29,452.149994,450.58551
2021-11-30,447.690002,446.140961
2021-12-01,439.519989,437.999207
2021-12-02,444.25,442.71286
