In [1]:
import tensorflow as tf
from   tensorflow import keras
from   tensorflow.keras import regularizers
from   tensorflow.keras import Sequential
from   tensorflow.keras.layers import Dropout, Dense

In [2]:
import tensorflow_docs as tfdocs
import tensorflow_docs.modeling
import tensorflow_docs.plots

In [3]:
from   IPython import display
from   matplotlib import pyplot as plt

In [4]:
import numpy as np
import os
import pandas as pd
import pathlib
import shutil
import tempfile

In [5]:
import config
import utils

In [6]:
logdir = pathlib.Path(tempfile.mkdtemp())/"tensorboard_logs"
shutil.rmtree(logdir, ignore_errors=True)

In [7]:
TICKER     = 'SPY'
EXPIRIES   = ['2020-08-07', '2020-08-14', '2020-08-21']
MAX_MARGIN = 500
MIN_PROFIT = 100
DATA_SPLIT = 0.7

In [8]:
# Load the data
data_df_list = []
for exp in EXPIRIES:
    print(exp)
    data_df_list.append(utils.load_spreads(TICKER, exp, verbose=True))
data_df = pd.concat(data_df_list, ignore_index=True)

2020-08-07
Attempting to load saved spreads
Loaded
2020-08-14
Attempting to load saved spreads
Loaded
2020-08-21
Attempting to load saved spreads
Loaded


In [9]:
# Whittle the data down to only what we want to stomach in terms of
# open margin
viable_trades_df = data_df[data_df.open_margin <= MAX_MARGIN]

In [10]:
# Normatlize all of the stuff that will be used for X.
# NOTE: do this before removing examples based on open_margin.
#       We want to include all data in the statistics.
normalized_df = utils.normalize_metadata_columns(data_df)
# normalized_df = utils.normalize_metadata_columns(viable_trades_df)

In [11]:
# Whittle the data down to only what we want to stomach in terms of
# open margin
viable_trades_df = normalized_df[normalized_df.open_margin <= MAX_MARGIN]

# We don't need the open_margin anymore
examples_df = viable_trades_df.drop(['open_margin'], axis=1)
# examples_df = normalized_df.drop(['open_margin'], axis=1)

In [12]:
examples_df.head(10)

Unnamed: 0,max_profit,minutes_to_expiry,leg1_type,leg1_strike,leg1_credit,leg1_volume,leg1_volatility,leg1_delta,leg1_gamma,leg1_theta,...,leg2_strike,leg2_credit,leg2_volume,leg2_volatility,leg2_delta,leg2_gamma,leg2_theta,leg2_vega,leg2_rho,leg2_openInterest
2559,-7.0,-1.345874,1.0,0.509434,0.46879,-0.087808,-0.673049,1.045069,3.785857,-3.987042,...,0.470033,-0.424913,-0.166593,-0.636639,1.171857,3.564627,-3.743056,0.609668,0.381299,-0.099722
2560,28.0,-1.345874,1.0,0.509434,0.46879,-0.087808,-0.673049,1.045069,3.785857,-3.987042,...,0.52221,-0.517242,0.257368,-0.692987,0.913137,3.808895,-4.022905,0.684278,0.32256,0.024291
2561,77.0,-1.345874,1.0,0.509434,0.46879,-0.087808,-0.673049,1.045069,3.785857,-3.987042,...,0.548298,-0.551682,0.679685,-0.720611,0.783524,3.697613,-3.914461,0.650294,0.292979,0.18038
2562,135.0,-1.345874,1.0,0.509434,0.46879,-0.087808,-0.673049,1.045069,3.785857,-3.987042,...,0.574387,-0.577329,-0.024809,-0.737192,0.659739,3.442582,-3.652533,0.57239,0.264664,-0.20001
2563,168.0,-1.345874,1.0,0.509434,0.46879,-0.087808,-0.673049,1.045069,3.785857,-3.987042,...,0.600475,-0.595648,-0.11676,-0.75333,0.545598,3.070564,-3.265441,0.458787,0.238528,0.524207
2564,164.0,-1.345874,1.0,0.509434,0.46879,-0.087808,-0.673049,1.045069,3.785857,-3.987042,...,0.626564,-0.60664,-0.150783,-0.755815,0.443952,2.61779,-2.791465,0.320512,0.215215,0.013026
2565,157.0,-1.345874,1.0,0.509434,0.46879,-0.087808,-0.673049,1.045069,3.785857,-3.987042,...,0.652652,-0.613235,-0.164949,-0.753429,0.356506,2.124139,-2.272804,0.169728,0.195124,-0.175141
2566,151.0,-1.345874,1.0,0.509434,0.46879,-0.087808,-0.673049,1.045069,3.785857,-3.987042,...,0.678741,-0.617632,-0.16381,-0.764012,0.283801,1.627252,-1.749681,0.017989,0.178411,-0.035949
2567,148.0,-1.345874,1.0,0.509434,0.46879,-0.087808,-0.673049,1.045069,3.785857,-3.987042,...,0.704829,-0.61983,-0.206308,-0.769962,0.22537,1.158774,-1.255598,-0.125101,0.164965,-0.337913
2568,147.0,-1.345874,1.0,0.509434,0.46879,-0.087808,-0.673049,1.045069,3.785857,-3.987042,...,0.730917,-0.620563,-0.17102,-0.746426,0.17996,0.740996,-0.81455,-0.252694,0.154497,0.065437


In [13]:
# Pop out the max_profit and compare it to our desired minimum profit
labels = examples_df.pop('max_profit') >= MIN_PROFIT

In [14]:
# Set the values to be used for working with the data
BATCH_SIZE = 512
BUFFER_SIZE = 100
n_examples, n_features = examples_df.shape

In [15]:
dataset = tf.data.Dataset.from_tensor_slices((examples_df.values, labels.values)).shuffle(n_examples)

In [16]:
# Split up the data
n_train = int(examples_df.shape[0] * DATA_SPLIT)
train_dataset = dataset.take(n_train)
test_dataset = dataset.skip(n_train)

In [17]:
STEPS_PER_EPOCH = n_train//BATCH_SIZE

In [18]:
train_dataset = train_dataset.shuffle(
    BUFFER_SIZE, reshuffle_each_iteration=True).batch(BATCH_SIZE).repeat()
validate_dataset = test_dataset.batch(BATCH_SIZE)

In [19]:
lr_schedule = keras.optimizers.schedules.InverseTimeDecay(
    0.001,
    decay_steps=STEPS_PER_EPOCH*5,
    decay_rate=1,
    staircase=False
)

def get_optimizer():
    return keras.optimizers.Adam(lr_schedule)

In [20]:
checkpoint_filepath = os.path.join(config.ML_MODELS_DIR, 'checkpoint')
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='loss',
    mode='min',
    save_best_only=True
)

def get_callbacks(name):
    return [
        # tfdocs.modeling.EpochDots(),
        model_checkpoint_callback,
#         tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10),
        tf.keras.callbacks.TensorBoard(logdir/name),
    ]

In [21]:
def compile_and_fit(model, name, optimizer=None, max_epochs=200):
    if optimizer is None:
        optimizer = get_optimizer()
    model.compile(
        optimizer=optimizer,
        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
        metrics=[
            tf.keras.losses.BinaryCrossentropy(
                from_logits=True, name='binary_crossentropy'),
            'accuracy'
        ]
    )

    model.summary()

    history = model.fit(
        train_dataset,
        steps_per_epoch = STEPS_PER_EPOCH,
        epochs=max_epochs,
        validation_data=validate_dataset,
        callbacks=get_callbacks(name),
        verbose=1)
    return history

In [22]:
# model = Sequential([
#     Dense(1024, activation='relu', input_shape=(n_features,)),
#     Dropout(0.5),
#     Dense(512, activation='relu'),
#     Dropout(0.5),
#     Dense(512, activation='relu'),
#     Dropout(0.5),
#     Dense(512, activation='relu'),
#     Dropout(0.3),
#     Dense(256, activation='relu'),
#     Dropout(0.1),
#     Dense(64, activation='relu'),
#     Dense(32, activation='relu'),
#     Dense(16, activation='relu'),
#     Dense(1)
# ])

In [23]:
model = Sequential([
    Dense(512, activation='relu', input_shape=(n_features,)),
    Dense(256, activation='relu'),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1)
])

In [24]:
size_histories = {}

In [None]:
size_histories['test'] = compile_and_fit(model, 'sizes/test')

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 512)               12288     
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_2 (Dense)              (None, 64)                16448     
_________________________________________________________________
dense_3 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_4 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 17        
Total params: 162,689
Trainable params: 162,689
Non-trainable params: 0
__________________________________________________

Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200

In [None]:
#docs_infra: no_execute

# Load the TensorBoard notebook extension
%load_ext tensorboard

# Open an embedded TensorBoard viewer
%tensorboard --logdir {logdir}/sizes