In [None]:
from preprocessing import get_model_dataset, create_train_test, min_max_scale, df_to_xy, read_file, lag_features
from lstm import create_model
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mvinje[0m ([33mavogadro[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
first_year = 2019
last_year = 2021
file = f"./data/processed_data/{first_year}-{last_year}.csv"

df_read = read_file(file)
print(df_read)
df_read.info()
print(df_read)
print(df_read["Ttl"].max())

In [None]:
tests = [10000, 1586, 1086, 365*2, 365, 180, 90, 60, 30, 15, 0]
for test in tests:
    print('Count Ttl >', test, ':', df_read["Ttl"][df_read['Ttl'] > test].count())

In [None]:
tests = [50, 25, 10, 5, 2, 1.5, 1, 0.5, 0]
print(df_read["Moneyness"].max())
print(df_read["Moneyness"].min())
for test in tests:
    print('Moneyness Ttl >', test, ':', df_read["Moneyness"][df_read['Moneyness'] > test].count() / df_read["Moneyness"].count() * 100)

In [None]:
features = ["Moneyness", "Ttl", "R", "Volatility"]
seq_length = 5
num_features = len(features)

df_read_lags = lag_features(df_read, features, seq_length)

df_train_orginal, df_test_orginal = create_train_test(df_read_lags, "2021-01-01")

train_x_org, train_y_org, = df_to_xy(df_train_orginal, num_features, seq_length)
test_x_org, test_y_org = df_to_xy(df_test_orginal, num_features, seq_length)

train_x_scaled, test_x_scaled = min_max_scale(train_x_org, test_x_org)
train_y_scaled, test_y_scaled = min_max_scale(train_y_org, test_y_org)

shuffle = np.random.permutation(len(train_x_scaled))
train_x_scaled, train_y_scaled = train_x_scaled[shuffle], train_y_scaled[shuffle]

train_x_scaled = np.reshape(train_x_scaled, (len(train_x_scaled), seq_length, num_features))
test_x_scaled = np.reshape(test_x_scaled, (len(test_x_scaled), seq_length, num_features))

print(f"Train_x shape: {train_x_scaled.shape}, train_y shape: {train_y_scaled.shape}")
print(f"Test_x shape: {test_x_scaled.shape}, test_y shape: {test_y_scaled.shape}")

In [None]:
plt.plot(range(1,len(df_read["Bid"])+1), df_read.sort_values(["Bid", "Ask"], ascending = True)["Bid"])

In [None]:
plt.plot(range(1,len(train_y_scaled[:,:1])+1), np.sort(train_y_scaled[:,:1], axis = 0))

In [None]:
from keras.callbacks import EarlyStopping
config = {
    "units": 64,
    "dropout": 0.1,
    "recurrent_dropout": 0.1,
    "learning_rate": 0.001,
    "layers": 5,
    "seq_length": seq_length,
    "num_features": num_features,
    "bn_momentum" : 0.1
}

def trainer(train_x, train_y, model):
    epochs = 100
    minibatch_size = 4096

    early_stopping = EarlyStopping(
        monitor='loss',
        mode='min',
        min_delta=1e-7,
        patience=10,
    )

    model.fit(
        train_x,
        train_y,
        batch_size = minibatch_size,
        epochs = epochs,
        callbacks = [early_stopping]
    )

model = create_model(config)
model.summary()

wandb.agent(sweep_id=sweep_id, function=model)

trainer(train_x_scaled, train_y_scaled, model)

In [None]:
def prediction(df_test, test_x, model, train_y_org, train_y_scaled):
    predictions = np.array(model(test_x))
    df_test["Raw_pred_bid"] = predictions[:, :1]
    df_test["Scaled_bid"] = train_y_scaled[:, :1]
    df_test["Raw_mae_bid"] = abs(df_test["Raw_pred_bid"] - df_test["Scaled_bid"] )
    scaler = MinMaxScaler().fit(train_y_org)
    predictions = scaler.inverse_transform(predictions)
    bid, ask = predictions[:, :1], predictions[:, 1:]
    df_test["Prediction_bid_strike"] = bid
    df_test["Prediction_ask_strike"] = ask
    df_test["Prediction_bid"] = df_test["Prediction_bid_strike"] * df_test["Strike"]
    df_test["Prediction_ask"] = df_test["Prediction_ask_strike"] * df_test["Strike"]
    df_test["MAE_bid"] = abs(df_test["Bid"] - df_test["Prediction_bid"])
    df_test["MAE_ask"] = abs(df_test["Ask"] - df_test["Prediction_ask"])
    return df_test

df_test = prediction(df_train_orginal, train_x_scaled, model, train_y_org, train_y_scaled)

df_test.info()
print(df_test.head())

In [None]:
print(df_test.head(20)[["Bid", "Ask", "Prediction_bid", "Prediction_ask", "MAE_bid", "MAE_ask"]])

In [None]:
print(f"MAE: {df_test['MAE_bid'].mean()}")
plt.scatter(df_test["Bid"], df_test["MAE_bid"])
plt.show()

In [None]:
print(f"MAE: {df_test['Raw_mae_bid'].mean()}")
plt.scatter(train_y_scaled[:,:1], df_test["Raw_mae_bid"])
plt.show()

In [None]:
print(f"MAE: {df_test['MAE_ask'].mean()}")
plt.scatter(df_test["Ask"], df_test["MAE_ask"])
plt.show()

In [None]:
plt.scatter(df_train_orginal["Strike"], df_train_orginal["Moneyness"])
plt.show()