## Import modules

In [1]:
import datetime
import sys

from scipy import stats
from torchvision import transforms
from torch.utils.data import DataLoader

sys.path.append('d:/gsn-projekt')

import amphibian.preprocess.preprocessing as preproc
from amphibian.fetch.reader import AmphibianReader
from amphibian.preprocess.train_test_split import Train_test_split
from amphibian.architectures import RNNModel
from amphibian.train import SingleTrainer, batch_size_dist

## Parameters

In [7]:
WINDOW_WIDTH = 10
PARAM_GRID_RNN = {
    'batch_size': batch_size_dist(32, 256),
    'seq_len': stats.randint(10, 11),
    'input_size': stats.randint(50, 51),
    'hidden_size': stats.randint(5, 20),
    'n_outputs': stats.randint(3,4),
    'num_layers': stats.randint(2,3),
    'dropout': stats.uniform(0, 1)
}

In [11]:
for k,v in PARAM_GRID_RNN.items():
    print(k, v.rvs(size=1)[0])

batch_size 256
seq_len 10
input_size 50
hidden_size 13
n_outputs 3
num_layers 2
dropout 0.5436987381990299


## Read data

In [7]:
DATA_PATH = r'D:\gsn-projekt\data\all_values\stocks\Auto Manufacturers - Major'

ar = AmphibianReader(DATA_PATH, datetime.datetime(2011, 1, 1), datetime.datetime(2018, 1, 1))

_ = ar.create_torch()

## Train test split

In [9]:
tts = Train_test_split(ar, int_start=0, int_end=1000)

In [14]:
tsds = preproc.TimeSeriesDataset(tts, int_len=WINDOW_WIDTH, transform=transforms.Compose([
    preproc.Fill_NaN(), preproc.Normalizing(), preproc.Dummy_Fill_NaN(), preproc.Formatting(), preproc.Formatting_y()
]))



In [16]:
tsds[1]

{'train_obs': tensor([[ 0.3922,  0.0000, -0.5291,  ..., -0.4317, -0.3979,  2.1348],
         [ 0.5746,  2.5894, -0.1829,  ..., -0.4747, -0.3981,  1.9274],
         [ 0.7851,  2.3545, -0.2361,  ..., -0.5347, -0.4721,  1.5955],
         ...,
         [ 0.8833,  1.7908,  0.0036,  ..., -0.6017, -0.5025,  1.3282],
         [ 0.7991,  1.6499, -0.2894,  ..., -0.7512, -0.6576,  1.0839],
         [ 0.7430,  1.6030, -0.2628,  ..., -0.7610, -0.6496,  0.8627]],
        device='cuda:0'), 'train_y': tensor(2.)}

In [21]:
dl = DataLoader(tsds, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
for i, batch in enumerate(dl):
    print(batch.size)