In [None]:
import psycopg2
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf

In [None]:
conn = psycopg2.connect(
    host='localhost',
    database='db',
    user='postgres',
    password='postgres'
)

In [None]:
cur = conn.cursor()

In [None]:
cur.execute("SELECT version()")

In [None]:
db_version = cur.fetchone()

In [None]:
print(db_version)

In [None]:
all_stock_code_sql = """
    SELECT stock.stock_code
    FROM stock
    ORDER BY stock.stock_code;
"""

In [None]:
cur.execute(all_stock_code_sql)

In [None]:
res = cur.fetchall()

In [None]:
print(res)
stock_codes = [x[0] for x in res]
print(stock_codes)

In [None]:
select_history_sql = """
    SELECT history.date, history.close, history.stock_code
    FROM history
    ORDER BY history.stock_code, history.date;
"""
# cur.execute(select_history_sql)
# res = cur.fetchall()
# print(res)

In [None]:
# for x in res[0]:
#     print(x, type(x))

In [None]:
# for x in res:
#     print(x)

In [None]:
def plot_series(time, series, label, start=0, end=None, format="-"):
    plt.plot(time[start:end], series[start:end], format, label=label)
    plt.xlabel("Time")
    plt.ylabel("Value")
    plt.grid(True)
    plt.legend()

In [None]:
# p_sql = """
#     SELECT * FROM get_stock_history('%s');
# """ % ("0050")
# df = pd.read_sql(p_sql, con=conn)

In [None]:
# prepared_sql = """
#     PREPARE get_history AS
#     SELECT h.date, h.close, h.stock_code
#     FROM history as h
#     WHERE h.stock_code = $1;
# """
# cur.execute(prepared_sql)

In [None]:
para_p_sql = """
    SELECT h.date, h.close, h.high, h.low, h.open, h.capacity, h.turnover, h.transactions, h.stock_code
    FROM history as h
    WHERE h.stock_code = %(stock_code)s;
"""

In [None]:
print(para_p_sql)

In [None]:
# cur.execute(p_sql)
# res = cur.fetchall()
# print(res)

In [None]:
# df = pd.read_sql(para_p_sql, con=conn, params={'stock_code': '006208'})
df = pd.read_sql(para_p_sql, con=conn, params={'stock_code': '0050'})

In [None]:
df

In [None]:
# mae: 2.2646
# matrix = df[['close', 'high', 'low', 'open', 'capacity', 'turnover', 'transactions']].values
matrix = df[['close']].values
time = df['date'].values

In [None]:
print(matrix.shape)

In [None]:
matrix[:9, :]

In [None]:
plot_series(time, matrix[:, 0], 'lr')

In [None]:
window_size = 20
batch_size = 32
# split_time = 150  # 500

ratio = 0.8
train_idx = int(matrix.shape[0] * ratio)
val_idx = (matrix.shape[0] - train_idx) // 2 + train_idx

shuffle_buffer = 1000
dim = matrix.shape[1]

def windowed_dataset_m(matrix, window_size=20, batch_size=32, shuffle_buffer=1000):
#     matrix = tf.expand_dims(matrix, axis=-1)
    dataset = tf.data.Dataset.from_tensor_slices(matrix)
    dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
    dataset = dataset.shuffle(shuffle_buffer)
    dataset = dataset.map(lambda window: (window[:-1], [window[-1:][0][0]]))
    dataset = dataset.batch(batch_size).prefetch(1)
    return dataset

def model_predict_m(model, matrix, window_size=20):
    ds = tf.data.Dataset.from_tensor_slices(matrix)
    ds = ds.window(window_size, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda window: window.batch(window_size))
    ds = ds.batch(32).prefetch(1)
    forecast = model.predict(ds)
    return forecast

# Prepare Dataset

In [None]:
# training set
time_train = time[:train_idx]
x_train = matrix[:train_idx]

# validation set
time_valid = time[train_idx:val_idx]
x_val = matrix[train_idx:val_idx]

# test set
time_test = time[val_idx:]
x_test = matrix[val_idx:]

print(matrix.shape)
print(time_train.shape, time_valid.shape, time_test.shape)
print(x_train.shape, x_val.shape, x_test.shape)

In [None]:
train_ds = windowed_dataset_m(x_train, window_size, shuffle_buffer)
valid_ds = windowed_dataset_m(x_val, window_size, shuffle_buffer)

In [None]:
# for window in train_ds:
# #     for val in window:
# #         print(val.numpy())
#     x, y = window
#     print(x.shape, y.shape)
#     print(x.numpy())
#     print('-')
#     print(y.numpy())

#     break

# Experiment on Learning Rate

In [None]:
def raw_model_1d(window_size, dim):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv1D(filters=8,
                               kernel_size=5,
                               strides=1,
                               padding="causal",
                               activation="relu",
                               input_shape=[window_size, dim]),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16, return_sequences=True)),
#         tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
#         tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
#         tf.keras.layers.LSTM(64, return_sequences=True),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(20, activation="relu"),
        tf.keras.layers.Dense(10, activation="relu"),
        tf.keras.layers.Dense(1)
    ])
    return model

In [None]:
# def raw_model_2d(window_size, dim):
#     model = tf.keras.models.Sequential([
#         tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1), input_shape=[window_size, dim]),
#         tf.keras.layers.Conv2D(filters=64,
#                                kernel_size=(5, dim),
#                                strides=1,
#                                activation="relu"),
# #         tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
# #         tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
# # #         tf.keras.layers.LSTM(64, return_sequences=True),
# #         tf.keras.layers.Flatten(),
# #         tf.keras.layers.Dense(30, activation="relu"),
# #         tf.keras.layers.Dense(10, activation="relu"),
# #         tf.keras.layers.Dense(1),
        
#     ])
#     return model

# model = raw_model_2d(window_size, dim)
# model.summary()

In [None]:
model = raw_model_1d(window_size, dim)

lr_schedule = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: 1e-8 * 10**(epoch / 20))

# optimizer = tf.keras.optimizers.SGD(lr=1e-8, momentum=0.9)
optimizer = tf.keras.optimizers.Adam(lr=1e-8)
model.compile(loss=tf.keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])
model.summary()

In [None]:
history = model.fit(train_ds, epochs=200, callbacks=[lr_schedule])

In [None]:
plt.semilogx(history.history["lr"], history.history["loss"])
plt.axis([1e-8, 10, 0, 60])

# Train the model

In [None]:
model = raw_model_1d(window_size, dim)

# optimizer = tf.keras.optimizers.SGD(lr=1e-3, momentum=0.9)
optimizer = tf.keras.optimizers.Adam(lr=1e-3)
model.compile(loss=tf.keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])
model.summary()

checkpoint_filepath = './tmp/checkpoint'
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_mae', patience=50)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_mae',
    verbose=1,
    save_best_only=True,
)

history = model.fit(train_ds,
                    validation_data=valid_ds,
                    callbacks=[early_stop, model_checkpoint],
                    verbose=2,
                    epochs=500)

model.load_weights(checkpoint_filepath)

# Forecast

In [None]:
forecast = model_predict_m(model, matrix, window_size)
# remove the latest one prediction (prediction in the future; we don't know the answer, so strip it now)
forecast = forecast[:-1]
print(forecast.shape)
print(matrix.shape)

### show test

In [None]:
forecast_test = forecast[val_idx - window_size:]
if len(matrix.shape) > 1:
    x_test_close = x_test[:, 0]
else:
    x_test_close = x_test
print(forecast_test.shape)
print(x_test_close.shape)

In [None]:
m = tf.keras.metrics.MeanAbsoluteError()
m.update_state(x_test_close, forecast_test)
m.result().numpy()
# tf.keras.metrics.mean_absolute_error(x_val, forecast_valid)

In [None]:
print("mean absolute error:", m.result().numpy())
plt.figure(figsize=(10, 6))
plot_series(time_test, x_test_close, 'true')
plot_series(time_test, forecast_test, 'predict')

### show valid

In [None]:
# forecast_valid = forecast[split_time - window_size:-1, -1, 0]

forecast_valid = forecast[train_idx - window_size:val_idx - window_size]
if len(matrix.shape) > 1:
    x_val_close = x_val[:, 0]
else:
    x_val_close = x_val
print(forecast_valid.shape)
print(x_val_close.shape)

In [None]:
print(x_val_close[:2])
print(forecast_valid[:2])

In [None]:
m = tf.keras.metrics.MeanAbsoluteError()
m.update_state(x_val_close, forecast_valid)
m.result().numpy()
# tf.keras.metrics.mean_absolute_error(x_val, forecast_valid)

In [None]:
print("mean absolute error:", m.result().numpy())
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_val_close, 'true')
plot_series(time_valid, forecast_valid, 'predict')

### show train

In [None]:
# forecast_valid = forecast[split_time - window_size:-1, -1, 0]
forecast_train = forecast[:train_idx]
if len(matrix.shape) > 1:
    x_train_close = x_train[:, 0]
else:
    x_train_close = x_train
print(time_train.shape)
print(forecast_train.shape)

In [None]:
plt.figure(figsize=(10, 6))
plot_series(time_train, x_train_close, 'true')

# time: [0, window_size]
# forecast_train: [window_size, split_time + window_size] 
# to display train set in the same figure, strip forecast_train[split_time:split_time+window_size]

plot_series(time_train[window_size:], forecast_train[:-window_size], 'predict')

# Save Model

In [None]:
print("output layers", [o.name[:-2] for o in model.outputs])
print("input layers", [i.name[:-2] for i in model.inputs])

In [None]:
# from tensorflow.python import saved_model
# from tensorflow.python.saved_model.signature_def_utils_impl import build_signature_def, predict_signature_def

In [None]:
# curr_dir = os.getcwd()
# # print(curr_dir)
# export_dir = os.path.join(curr_dir, "stocknet")

In [None]:
# builder = saved_model.builder.SavedModelBuilder(export_dir)

In [None]:
# model_version = 1
# signature = predict_signature_def(
#     inputs={"input": model.input},
#     outputs={"output": model.output}
# )

In [None]:
import os
curr_dir = os.getcwd()
print(curr_dir)

In [None]:
model_version = 1
model_save_path = os.path.join(curr_dir, "stocknet/{:d}/".format(model_version))
tf.saved_model.save(model, model_save_path)