In [1]:
# from helper import TimeSeriesDataset
from model import DARNN
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import wandb
from sklearn.metrics import balanced_accuracy_score
import os
import random
import pandas as pd
import copy
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler
from helper import prepare_data_with_window

In [2]:
# # Replace YOUR_API_KEY with your actual API key
# os.environ['WANDB_API_KEY'] = 'b62c5332280f3f3dec6354dacedf36d490db3aeb'
# wandb.login()

In [3]:
seed = 1
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

dir_path = '/Users/charlesmiller/Documents/Code/DL_experiments/project_c/models/DARNN/data'

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
device

device(type='mps')

In [4]:
def prepare_model(params,features_cont,features_cat,save_model = False,model_name = 'darnn_idxmag7'):
    model_params = {
        'input_features':  "num of features",
        'encoder_units':  64,
        'decoder_units': 64,
        'time_steps':  "time_steps",
        'device': device,
    }
    model = DARNN(**model_params)
    return model

In [5]:
def train_log(loss, example_ct, epoch):
    # Where the magic happens
    # wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")

In [6]:
def train_model(model, X_train, y_train, X_test, y_test, criterion, optimizer, config, save_model = True, model_name = 'best_model'):
    # Tell wandb to watch what the model gets up to: gradients, weights, and more!
    # wandb.watch(model, criterion, log="all", log_freq=10)
    model = model.to(device)
    X_train, y_train = X_train.to(device), y_train.to(device)
    X_test, y_test = X_test.to(device), y_test.to(device)
    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)

    training_loss = []
    validation_loss = []
    example_ct = 0
    min_val_loss = 100000
    batch_size = 128
    epochs = config['epochs']

    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

    # train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    # val_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
    
    for epoch in range(epochs):
        for inputs, labels in train_loader:
            # Move tensors to the right device
            inputs, labels = inputs.to(device), labels.to(device)

            prediction = model(inputs)
            loss = criterion(prediction, labels)
            example_ct +=  len(labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            val_prediction = model(inputs)
            val_loss = criterion(val_prediction, labels)

            training_loss.append(loss.item())
            validation_loss.append(val_loss.item())

        if val_loss.item() < min_val_loss:
            best_params = copy.deepcopy(model.state_dict())
            min_val_loss = val_loss.item()

        if epoch % 10 == 0:
            print(f"Epoch {epoch}: Train loss: {loss.item()}, Validation loss: {val_loss.item()}")
            # train_log(loss, example_ct, epoch)

    if save_model:
        torch.save(best_params, f'{dir_path}/{model_name}.pth')

    return min_val_loss

def build_optimizer(model, config):
    if config['optimizer'] == 'adam':
        optimizer = torch.optim.Adam(
            model.parameters(), lr=config['learning_rate'])
    elif config['optimizer'] == 'sgd':
        optimizer = torch.optim.SGD(
            model.parameters(), lr=config['learning_rate'])
    return optimizer

def make(config,features_cont,features_cat):
    # df = pd.read_csv(data_path)
    # alerts_df = pd.read_csv(alerts_path)
    # df = df.sort_values('t',ascending=True).reset_index(drop=True)
    # train_idx = int(len(df) * 0.8)
    # train = df.iloc[:train_idx]
    # test = df.iloc[train_idx:]
    # X_train, y_train = prepare_data(train,features_cont,features_cat,window_size=config['window_size'],alerts_df=alerts_df)
    # X_test, y_test = prepare_data(test,features_cont,features_cat,window_size=config['window_size'],alerts_df=alerts_df)
    model = prepare_model(config, features_cont, features_cat)
    criterion = nn.CrossEntropyLoss()
    optimizer = build_optimizer(model, config)
    
    return model, criterion, optimizer


In [7]:
# def test(config):
#     location = 'Sydney'
#     w = 14
#     # We pick 14 before the 2016-01-01 for the sliding window
#     from_date = '2015-12-17'
#     to_date = '2017-01-01'

#     date_fmt = '%Y-%m-%d'
#     df = get_df_complete()

#     # features
#     features_cont = ['MinTemp', 'MaxTemp', 'Rainfall', 'WindGustSpeed', 'WindSpeed9am',
#                     'WindSpeed3pm', 'Humidity9am', 'Humidity3pm', 'Pressure9am',
#                     'Pressure3pm', 'Cloud9am', 'Cloud3pm', 'Temp9am', 'Temp3pm']
#     features_cat = ['RainToday']

#     # Assuming df is your DataFrame


#     X, Y = [], []
#     df_l = df[(df['Location'] == location) & (df.index < to_date) & (df.index > from_date)]
#     D = []
#     for f in features_cont:
#         D.append(df_l[f].interpolate('linear').fillna(0).values)
#     for f in features_cat:
#         D.append(df_l[f].map({'Yes': 1, 'No': 0}).fillna(0).values)
#         # transpose to time series
#     TS = []
#     for i in range(df_l.shape[0]):
#         row = []
#         for c in D:
#             row.append(c[i])
#         TS.append(row)
#     in_seq, out_seq = sliding_window(TS, w, 1)
#     rain_seq = [r[0][-1] for r in out_seq]
#     X.extend(in_seq)
#     Y.extend(rain_seq)

#     # X[features_cont] = scaler.fit_transform(X[features_cont])
#     X_test = torch.tensor(X).float().transpose(1, 2)

#     model_name = 'best_model'
#     model = prepare_model(config, features_cont, features_cat, model_name = model_name)
#     model = model.to(device)
#     model.load_state_dict(torch.load(f'{dir_path}/{model_name}.pth'))
#     model.eval()
#     X_test = X_test.to(device)

#     with torch.no_grad():
#         predicted_prob = model(X_test)
#         model_prediction = predicted_prob.data.max(1, keepdim = True)[1].view(-1).tolist()
#         no_rain_prediction = [0] * len(Y)
#         no_sun_prediction = [1] * len(Y)
#         coin_flip_prediction = [random.randint(0, 1) for _ in range(len(Y))]
#         tomorrow_like_today_prediction = X_test[:, -1, -1].view(-1).tolist()

#         ba_sc = balanced_accuracy_score
#         model_score = round(ba_sc(Y, model_prediction), 4)
#         no_rain_score = round(ba_sc(Y, no_rain_prediction), 4)
#         no_sun_score = round(ba_sc(Y, no_sun_prediction), 4)
#         coin_flip_score = round(ba_sc(Y, coin_flip_prediction), 4)
#         tlt_score = round(ba_sc(Y, tomorrow_like_today_prediction), 4)

#         print(f'Model Prediction Score: {model_score}')
#         print(f'No Rain Prediction Score: {no_rain_score}')
#         print(f'Rain Prediction Score: {no_sun_score}')
#         print(f'Coin Flip Prediction Score: {coin_flip_score}')
#         print(f'Tomorrow like Today Prediction Score: {tlt_score}')

#     torch.onnx.export(model, X_test, "model.onnx")
#     wandb.save("model.onnx")

In [8]:
def model_pipeline(hyperparameters,features_cont,features_cat,X_train, y_train, X_test, y_test):

    # tell wandb to get started
    with wandb.init(project="darnn-idxmag7", config=hyperparameters):
      # access all HPs through wandb.config, so logging matches execution!
      config = wandb.config

    # make the model, data, and optimization problem
    model,criterion, optimizer = make(hyperparameters,features_cont,features_cat)
    print(model)

    # and use them to train the model
    train_model(model, X_train, y_train, X_test, y_test, criterion, optimizer, hyperparameters)

    # # and test its final performance
    test(config)

    return model

In [9]:
config = {
    "tcl_num":          2,
    "tcl_channel_size": 128,
    "kernel_size":      7,
    "dropout":          0.3,
    "slices":           1,
    "use_bias":         True,
    "learning_rate":    0.01,
    "epochs": 800,
    "classes":2,
    "batch_size": 128,
    "window_size": 20,
    "dataset" : "BFC3D_CLASSIFIER",
    "architecture": "TCN",
    "optimizer": "adam"
  }
features_cont = ['v', 'vw', 'o', 'c', 'h', 'l','n','sma_20','sma_5', 'bb_spread',
       'bb_trend','sma_20_trend', 'sma_5_trend', 'pct_5d_high','rsi','macd','roc',
       'pct_5d_low', 'stddev_close_diff_5d', 'stddev_close_diff_10d']
features_cat = ['bb_category']
features = features_cont + features_cat

In [10]:
windowed_data = pd.read_csv('data_window20.csv')
identifiers = windowed_data['alert_identifier'].unique()
train_identifiers = identifiers[:int(len(identifiers)*0.8)]
test_identifiers = identifiers[int(len(identifiers)*0.8):]
train = windowed_data[windowed_data['alert_identifier'].isin(train_identifiers)]
test = windowed_data[windowed_data['alert_identifier'].isin(test_identifiers)]


In [31]:
print(len(train_identifiers),len(test_identifiers))

105734 26434


In [78]:
# scaler = MinMaxScaler()
# scaler.fit(train[features_cont])
# train[features_cont] = scaler.transform(train[features_cont])
# test[features_cont] = scaler.transform(test[features_cont])

# X_test, y_test = [], []
# print('Preparing test data')
# for identifier in test_identifiers[:5000]:
#     X_test.append(test[test['alert_identifier'] == identifier][features].values)
#     y_test.append(test[test['alert_identifier'] == identifier]['one_max_vol_label'].values[-1])

# X_train, y_train = [], []
# print('Preparing train data')
# for identifier in train_identifiers[70000:]:
#     X_train.append(train[train['alert_identifier'] == identifier][features].values)
#     y_train.append(train[train['alert_identifier'] == identifier]['one_max_vol_label'].values[-1])

# X_val, y_val = [], []
# print('Preparing val data')
# for identifier in test_identifiers[5000:6000]:
#     X_val.append(train[train['alert_identifier'] == identifier][features].values)
#     y_val.append(train[train['alert_identifier'] == identifier]['one_max_vol_label'].values[-1])

X_train = torch.tensor(X_train).float().transpose(1, 2)
y_train = torch.tensor(y_train).long()
X_test = torch.tensor(X_test).float().transpose(1, 2)
y_test = torch.tensor(y_test).long()
# X_val = torch.tensor(X_val).float().transpose(1, 2)
# y_val = torch.tensor(y_val).long()

In [79]:
X_train.shape

torch.Size([35734, 21, 20])

In [80]:
model = DARNN(X_train.shape[2], 128, 128, X_train.shape[1],num_classes=2,device=device).to(device)
opt = torch.optim.Adam(model.parameters(), lr=0.01)

In [81]:
epoch_scheduler = torch.optim.lr_scheduler.StepLR(opt, 20, gamma=0.9)

In [82]:
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import balanced_accuracy_score, accuracy_score, average_precision_score
data_train_loader = DataLoader(TensorDataset(X_train, y_train), shuffle=True, batch_size=128)
data_test_loader = DataLoader(TensorDataset(X_test, y_test), shuffle=False, batch_size=128)

In [16]:
## The next step is to 

In [83]:
epochs = 550
loss = nn.CrossEntropyLoss()
patience = 15
min_val_loss = 9999
counter = 0
for i in range(epochs):
    mse_train = 0
    for batch_x, batch_y in data_train_loader:
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)
        opt.zero_grad()
        y_pred = model(batch_x)
        y_pred = y_pred.squeeze(1)
        l = loss(y_pred, batch_y)
        l.backward()
        mse_train += l.item()*batch_x.shape[0]
        opt.step()
    epoch_scheduler.step()
    with torch.no_grad():
        mse_val = 0
        preds = []
        true = []
        for batch_x, batch_y in data_test_loader:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)
            output = model(batch_x)
            output = output.squeeze(1)
            preds.append(output.detach().cpu().numpy())
            true.append(batch_y.detach().cpu().numpy())
            mse_val += loss(output, batch_y).item()*batch_x.shape[0]
    preds = np.concatenate(preds)
    true = np.concatenate(true)
    
    if min_val_loss > mse_val**0.5:
        min_val_loss = mse_val**0.5
        print("Saving...")
        torch.save(model.state_dict(), "darnn_nasdaq.pt")
        counter = 0
    else: 
        counter += 1
    
    if counter == patience:
        break
    print("Iter: ", i, "train: ", (mse_train/len(X_train))**0.5, "val: ", (mse_val/len(X_test))**0.5)
    if(i % 10 == 0):
        ## need to add accuracy
        # preds = preds*(target_train_max - target_train_min) + target_train_min
        # true = true*(target_train_max - target_train_min) + target_train_min
        # mse = mean_squared_error(true, preds)
        # mae = mean_absolute_error(true, preds)
        acc = accuracy_score(true, np.argmax(preds, axis=1))
        bal_acc = balanced_accuracy_score(true, np.argmax(preds, axis=1))
        print("acc: ", acc, "bal_acc: ", bal_acc)

Saving...
Iter:  0 train:  0.8153850864846838 val:  0.803644207626218
acc:  0.6336 bal_acc:  0.5201843086632243
Iter:  1 train:  0.8056162789895439 val:  0.8071147073317201
Saving...
Iter:  2 train:  0.8030258635978527 val:  0.8003069533126455
Iter:  3 train:  0.8025087482487684 val:  0.8267017948342027
Saving...
Iter:  4 train:  0.7991951865782567 val:  0.7991580325692144
Iter:  5 train:  0.7978153617077464 val:  0.8150436815596706
Iter:  6 train:  0.7976817243107028 val:  0.8078819908490058
Iter:  7 train:  0.8053841790261644 val:  0.8057676388203242
Iter:  8 train:  0.8461429941760205 val:  0.8057677259303327
Iter:  9 train:  0.8461271825687084 val:  0.8057677259303327
Iter:  10 train:  0.8460448002080194 val:  0.8056489944140174
acc:  0.664 bal_acc:  0.5
Iter:  11 train:  0.953022545955627 val:  0.9885654394884236
Iter:  12 train:  0.954249391858971 val:  0.9885654394884236
Iter:  13 train:  0.9542493918432391 val:  0.9885654394884236
Iter:  14 train:  0.954249391848483 val:  0.988

In [68]:
true

array([0, 0, 1, ..., 0, 1, 1])

In [69]:
np.argmax(preds, axis=1)

array([1, 0, 0, ..., 0, 1, 0])

In [29]:
preds

array([[9.9999750e-01, 2.5486390e-06],
       [9.9999940e-01, 6.3530064e-07],
       [9.9999785e-01, 2.1878295e-06],
       [9.9999797e-01, 2.0059467e-06],
       [9.9999785e-01, 2.1814603e-06],
       [9.9999762e-01, 2.3466964e-06],
       [9.9999607e-01, 3.9053161e-06],
       [9.9999940e-01, 6.4674191e-07],
       [9.9999607e-01, 3.9166885e-06],
       [9.9999774e-01, 2.2521695e-06],
       [9.9999750e-01, 2.5363649e-06],
       [9.9999940e-01, 6.4300025e-07],
       [9.9999762e-01, 2.3483396e-06],
       [9.9999940e-01, 6.3192942e-07],
       [9.9999774e-01, 2.2386957e-06],
       [9.9999654e-01, 3.4245973e-06],
       [9.9999738e-01, 2.5630143e-06],
       [9.9999607e-01, 3.9102320e-06],
       [9.9999750e-01, 2.5217369e-06],
       [9.9999177e-01, 8.2108199e-06],
       [9.9999750e-01, 2.5439838e-06],
       [9.9999785e-01, 2.1786659e-06],
       [9.9999821e-01, 1.8032334e-06],
       [9.9999833e-01, 1.7210220e-06],
       [9.9999189e-01, 8.1449498e-06]], dtype=float32)