In [None]:
import torch as t
import pandas as pd
import numpy as np
from tqdm import tqdm


from src.preprocess import *
from src.models import *
from src.utils import *

from torch.utils.tensorboard import SummaryWriter

import warnings
warnings.simplefilter('ignore')


In [None]:
# setting up
DEVICE = 'cuda' if t.cuda.is_available() else 'cpu'
DEVICE = 'mps' if t.backends.mps.is_available() else DEVICE
DATASET_PATH = './dataset/result.csv'
LOOK_FWD = 700
W_SIZE = 400
THR = 0.000001
TRAIN_TEST_R = 0.8
TRAIN_VAL_R = 0.8
HIDDEN = 60
N_LAYERS = 5
BATCH_SIZE = 256

In [None]:
import pickle

unzip_dataset('./dataset/')
pckl_name = './dataset/dataset.pckl'
if os.path.isfile(pckl_name):
    print('loading the dataset...')
    with open(pckl_name, 'rb') as f:
        dataset = pickle.load(f)
else: 
    dataset = pd.read_csv(DATASET_PATH, index_col=0)
    print('saving the dataset...')
    with open(pckl_name, 'wb+') as f:
        pickle.dump(dataset, f)

dataset = dataset[:500000]
dataset.shape

In [None]:
labels = generate_labels(dataset, LOOK_FWD, THR)[LOOK_FWD:]

In [None]:
import matplotlib.pyplot as plt
plt.hist(labels)
plt.show()

In [None]:
labels.shape

In [None]:
pipe = Pipe(W_SIZE)

train_index = int((dataset.shape[0] - LOOK_FWD * 2 - W_SIZE) * TRAIN_TEST_R)
test_index_start = train_index + W_SIZE
ds_train = dataset[:train_index]
ds_test = dataset[test_index_start: - LOOK_FWD * 2]

dataset = None

pipe.fit(ds_train)

train_t = pipe.transform(ds_train)
test_t = pipe.transform(ds_test)

train_l_t = t.tensor(labels[:train_t.shape[0]])
test_l_t = t.tensor(labels[test_index_start:test_index_start + test_t.shape[0]])

train_val_idx = int(train_l_t.shape[0] * TRAIN_VAL_R)

train_ds = t.utils.data.TensorDataset(train_t[:train_val_idx], train_l_t[:train_val_idx])
val_ds = t.utils.data.TensorDataset(train_t[train_val_idx:], train_l_t[train_val_idx:])
test_ds = t.utils.data.TensorDataset(test_t, test_l_t)

train_dl = t.utils.data.DataLoader(train_ds, batch_size=BATCH_SIZE)
val_dl = t.utils.data.DataLoader(val_ds, batch_size=BATCH_SIZE)
test_dl = t.utils.data.DataLoader(test_ds, batch_size=BATCH_SIZE)

In [None]:
model = Att_GRU(train_t.shape[2], 5, 20, W_SIZE, DEVICE).to(device=DEVICE)
model(train_t[500:505].to(device=DEVICE)).shape

In [None]:
%%time
model_1 = Att_GRU(test_t.shape[2],
                  n_layers=N_LAYERS,
                  hidden=HIDDEN,
                  device=DEVICE,
                  window=W_SIZE).to(device=DEVICE)
optimizer = t.optim.Adam(model.parameters())
lf = t.nn.CrossEntropyLoss()
sm = SummaryWriter('./runs')

train(train_dl, val_dl, model, optimizer, lf, 10, device=DEVICE, sm=sm)