In [1]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from datetime import datetime
from datetime import timedelta
#import re
#import matplotlib.pyplot as plt
import support.ts_class as ts_class
import support.load_and_process_data as lpdata
import math

In [2]:
import pyarrow.parquet as pq

In [36]:
train_years = list(range(2017,2021))
val_year = [2021]
test_year = [2022]

MAX_EPOCHS = 1
WIND_SIZE = 3
LABELS_TO_PREDITC = ['MSFT_Close']

In [27]:
#n = len(vbs)

train_set = pq.read_table('./train_data', filters=[('year','in',train_years)])\
    .to_pandas()\
    .drop(columns = ["year"])\
    .set_index("Datetime")


val_set = pq.read_table('./train_data', filters=[('year','in',val_year)])\
    .to_pandas()\
    .drop(columns = ["year"])\
    .set_index("Datetime")

test_set = pq.read_table('./train_data', filters=[('year','in',test_year)])\
    .to_pandas()\
    .drop(columns = ["year"])\
    .set_index("Datetime")


In [28]:
#print(n)
print(train_set.shape)
print(val_set.shape)
print(test_set.shape)

(2103845, 286)
(525600, 286)
(396322, 286)


In [29]:
def compile_and_fit(model, window, max_epochs = MAX_EPOCHS):
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience = 10, restore_best_weights = True)

    model.compile(loss=tf.losses.MeanSquaredError(),optimizer=tf.optimizers.Adam(),metrics=[tf.metrics.MeanAbsoluteError()])

    history = model.fit(window.train, epochs=max_epochs,validation_data=window.val, callbacks=[early_stopping])
    return history

In [33]:
day_window = ts_class.WindowGenerator(input_width=WIND_SIZE, label_width=1, shift=1,label_columns=['MSFT_Close'],train_df=train_set, val_df=val_set, test_df=test_set)
day_window

Total window size: 16
Input indices: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
Label indices: [15]
Label column name(s): ['MSFT_Close']

In [40]:
conv_model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(filters=32,
                           kernel_size=(WIND_SIZE,),
                           activation='relu'),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=1),
])

In [30]:
lstm_model = tf.keras.models.Sequential([
    # Shape [batch, time, features] => [batch, time, lstm_units]
    tf.keras.layers.LSTM(32, return_sequences=False),
    # Shape => [batch, time, features]
    tf.keras.layers.Dense(units=1)
])

In [41]:
history = compile_and_fit(conv_model, day_window)

