In [15]:
# !pip3 install wget
import torch
import torch.nn as nn

In [77]:
!pip3 install bayesian-optimization

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [78]:
import wget, os, gzip, pickle, random, re, sys

IMDB_URL = 'http://dlvu.github.io/data/imdb.{}.pkl.gz'
IMDB_FILE = 'imdb.{}.pkl.gz'

PAD, START, END, UNK = '.pad', '.start', '.end', '.unk'

def load_imdb(final=False, val=5000, seed=0, voc=None, char=False):

    cst = 'char' if char else 'word'

    imdb_url = IMDB_URL.format(cst)
    imdb_file = IMDB_FILE.format(cst)

    if not os.path.exists(imdb_file):
        wget.download(imdb_url)

    with gzip.open(imdb_file) as file:
        sequences, labels, i2w, w2i = pickle.load(file)

    if voc is not None and voc < len(i2w):
        nw_sequences = {}

        i2w = i2w[:voc]
        w2i = {w: i for i, w in enumerate(i2w)}

        mx, unk = voc, w2i['.unk']
        for key, seqs in sequences.items():
            nw_sequences[key] = []
            for seq in seqs:
                seq = [s if s < mx else unk for s in seq]
                nw_sequences[key].append(seq)

        sequences = nw_sequences

    if final:
        return (sequences['train'], labels['train']), (sequences['test'], labels['test']), (i2w, w2i), 2

    # Make a validation split
    random.seed(seed)

    x_train, y_train = [], []
    x_val, y_val = [], []

    val_ind = set( random.sample(range(len(sequences['train'])), k=val) )
    for i, (s, l) in enumerate(zip(sequences['train'], labels['train'])):
        if i in val_ind:
            x_val.append(s)
            y_val.append(l)
        else:
            x_train.append(s)
            y_train.append(l)

    return (x_train, y_train), \
           (x_val, y_val), \
           (i2w, w2i), 2


def gen_sentence(sent, g):

    symb = '_[a-z]*'

    while True:

        match = re.search(symb, sent)
        if match is None:
            return sent

        s = match.span()
        sent = sent[:s[0]] + random.choice(g[sent[s[0]:s[1]]]) + sent[s[1]:]

def gen_dyck(p):
    open = 1
    sent = '('
    while open > 0:
        if random.random() < p:
            sent += '('
            open += 1
        else:
            sent += ')'
            open -= 1

    return sent

def gen_ndfa(p):

    word = random.choice(['abc!', 'uvw!', 'klm!'])

    s = ''
    while True:
        if random.random() < p:
            return 's' + s + 's'
        else:
            s+= word

def load_brackets(n=50_000, seed=0):
    return load_toy(n, char=True, seed=seed, name='dyck')

def load_ndfa(n=50_000, seed=0):
    return load_toy(n, char=True, seed=seed, name='ndfa')

def load_toy(n=50_000, char=True, seed=0, name='lang'):

    random.seed(0)

    if name == 'lang':
        sent = '_s'

        toy = {
            '_s': ['_s _adv', '_np _vp', '_np _vp _prep _np', '_np _vp ( _prep _np )', '_np _vp _con _s' , '_np _vp ( _con _s )'],
            '_adv': ['briefly', 'quickly', 'impatiently'],
            '_np': ['a _noun', 'the _noun', 'a _adj _noun', 'the _adj _noun'],
            '_prep': ['on', 'with', 'to'],
            '_con' : ['while', 'but'],
            '_noun': ['mouse', 'bunny', 'cat', 'dog', 'man', 'woman', 'person'],
            '_vp': ['walked', 'walks', 'ran', 'runs', 'goes', 'went'],
            '_adj': ['short', 'quick', 'busy', 'nice', 'gorgeous']
        }

        sentences = [ gen_sentence(sent, toy) for _ in range(n)]
        sentences.sort(key=lambda s : len(s))

    elif name == 'dyck':

        sentences = [gen_dyck(7./16.) for _ in range(n)]
        sentences.sort(key=lambda s: len(s))

    elif name == 'ndfa':

        sentences = [gen_ndfa(1./4.) for _ in range(n)]
        sentences.sort(key=lambda s: len(s))

    else:
        raise Exception(name)

    tokens = set()
    for s in sentences:

        if char:
            for c in s:
                tokens.add(c)
        else:
            for w in s.split():
                tokens.add(w)

    i2t = [PAD, START, END, UNK] + list(tokens)
    t2i = {t:i for i, t in enumerate(i2t)}

    sequences = []
    for s in sentences:
        if char:
            tok = list(s)
        else:
            tok = s.split()
        sequences.append([t2i[t] for t in tok])

    return sequences, (i2t, t2i)


### Part 1: Classification: data loading

In [17]:
(x_train, y_train), (x_val, y_val), (i2w, w2i), numcls = load_imdb(final=False)

In [18]:
w2i

{'.pad': 0,
 '.start': 1,
 '.end': 2,
 '.unk': 3,
 'the': 4,
 'and': 5,
 'a': 6,
 'of': 7,
 'to': 8,
 'is': 9,
 'br': 10,
 'it': 11,
 'in': 12,
 'i': 13,
 'this': 14,
 'that': 15,
 's': 16,
 'was': 17,
 'as': 18,
 'movie': 19,
 'for': 20,
 'with': 21,
 'but': 22,
 'film': 23,
 'you': 24,
 't': 25,
 'on': 26,
 'not': 27,
 'he': 28,
 'are': 29,
 'his': 30,
 'have': 31,
 'one': 32,
 'be': 33,
 'all': 34,
 'at': 35,
 'they': 36,
 'by': 37,
 'an': 38,
 'who': 39,
 'so': 40,
 'from': 41,
 'like': 42,
 'there': 43,
 'or': 44,
 'just': 45,
 'her': 46,
 'out': 47,
 'about': 48,
 'if': 49,
 'has': 50,
 'what': 51,
 'some': 52,
 'good': 53,
 'can': 54,
 'when': 55,
 'more': 56,
 'very': 57,
 'she': 58,
 'up': 59,
 'no': 60,
 'time': 61,
 'my': 62,
 'even': 63,
 'would': 64,
 'which': 65,
 'only': 66,
 'story': 67,
 'really': 68,
 'see': 69,
 'their': 70,
 'had': 71,
 'me': 72,
 'well': 73,
 'we': 74,
 'were': 75,
 'than': 76,
 'much': 77,
 'bad': 78,
 'get': 79,
 'been': 80,
 'other': 81,
 'do': 

### Part 2: Classification, baseline model

In [19]:
import torch.nn as nn


In [20]:
len(x_train)

20000

### Batching and Padding

In [84]:
x_train[0], y_train[0]

([14, 19, 9, 379, 22, 11, 50, 52, 53, 290], 1)

In [None]:
# Batching data 
# sort data 
x_train_sorted = sorted(x_train, key = lambda s : len(s))
# get index of sorted x_train
sorted_index = [x_train.index(seq) for seq in x_train_sorted]
# sort y_train using the indexes
y_train_sorted = [y_train[i] for i in sorted_index]

# batching 
x_batches = []
y_batches = []
# cut of value for batches -> batches are created with sequences that contain a max diff of 100
batch_buffer = 100 
# key for batching -? [index, current seq length]
start = [0, len(x_train[0])] 
# batch
for i, val in enumerate(x_train):
    # if seq length is greater than batch_buffer create batch 
    if len(val) - start[1] > batch_buffer:
        # create batch
        x_batches.append(x_train[start[0] : i])
        y_batches.append(y_train[start[0] : i])
        # update index and current seq length
        start[0] = i
        start[1] = len(val)


In [None]:
# padding

# padded batches 
px_batches = []
unique = set()
# apply padding per batch
for batch in x_batches:
    p_batch = [] # current patted batch
    # get maximal seq length for current batch
    max_size = max(len(seq) for seq in batch)
    # loop over seq in batch
    for seq in batch:
        unique.update(seq)
        # apply padding to seq and appedn
        p_batch.append(seq + [0]*(max_size - len(seq)))
    # append padded batch to padded batches
    px_batches.append(p_batch)
    

66968

In [26]:
import numpy as np

In [27]:
x = np.array([[1,1], [2,2], [3,3]])
x[:,1]

array([1, 2, 3])

### Elman Layer

In [28]:
import numpy as np

In [29]:
class Elman(nn.Module):
    
    def __init__(self, insize=300, outsize=300, hsize=300):
        super().__init__()
        self.lin1 = nn.Linear(insize, hsize)
        self.lin2 = nn.Linear(hsize, outsize) 

    def forward(self, x, hidden=None):
        # batch, len of sequence, embedding 
        b, t, e = x.size()
        if hidden is None:
            hidden = torch.zeros(b, e, dtype=torch.float)
        
        prev_h = None
        outs = []
        # range over time 
        for i in range(t):
            # inp = torch.cat([x[:, i, :], hidden], dim=1)
            inp = []
            # Compute first pass 
            xi = self.lin1(x[:, i , :])
            
            # manage hidden values 
            if prev_h is not None:
                xh = xi 
            else:
                xh = xi + prev_h

            # update hidden states 
            xh = np.tanh(xh)
            prev_h = xh

            hidden = xh

            # get outputs from sequence 
            out = self.lin2(xh)
            outs.append(out[:, None, :])

        return torch.cat(outs, dim=1), hidden

In [30]:
class Elman(nn.Module):
    
    def __init__(self, insize=300, outsize=300, hsize=300):
        super().__init__()

        self.lin1 = nn.Linear(insize, hsize)
        self.lin2 = nn.Linear(hsize, outsize) 

    def forward(self, x, hidden=None):
        # batch, len of sequence, embedding 
        b, t, e = x.size()
        if hidden is None:
            hidden = torch.zeros(b, e, dtype=torch.float)
        
        outs = []
        # range over time 
        for i in range(t):
            inp = torch.cat([x[:, i, :], hidden], dim=1)

            # Compute first pass 
            xi = self.lin1(inp)

            # hidden 
            xh = np.tanh(xi)
            hidden = xh

            # get outputs from sequence 
            out = self.lin2(xh)

            outs.append(out[:, None, :])

        return torch.cat(outs, dim=1), hidden

### Elman Network Pytorch 

In [32]:
px_batches_tens = [torch.tensor(i) for i in px_batches]
y_batches_tens = [torch.tensor(i, dtype = torch.float32) for i in y_batches]

In [94]:
emb = nn.Embedding(len(i2w), embedding_dim = 150)
x_emb = emb(px_batches_tens[0])
x_emb.shape


torch.Size([2775, 110, 150])

In [95]:
px_batches_tens[0].shape

torch.Size([2775, 110])

In [None]:
#  batch , lenght , embedding 
# embedding size = number of unique tokens in a batch = input size 
#  seq size = 
# embedding
class ELMAN(nn.Module):
    def __init__(self,embedding_size, hidden_size, output_size, dropout): 
                #  input_size, hidden_size, num_classes):
        super(ELMAN, self).__init__()
        self.hidden_size = hidden_size
        self.emb = nn.Embedding(embedding_size, embedding_dim = 150)
        self.rnn = nn.RNN(150, hidden_size, dropout = dropout, batch_first = True)
        self.lin1 = nn.Linear(hidden_size, output_size)

    def forward(self, x, h0):
        # create emebeddings
        x_emb = self.emb(x)

        # pass through rnn
        out, _ = self.rnn(x_emb, h0)

        # predict
        out = self.lin1(out[:, -1]) 

        return out

In [None]:
# inputs for network and hyperparameters
embedding_size = len(i2w)
hidden_size = 300
output_size = 1
alpha = 0.003
epochs = 10 
batch_size = len(px_batches_tens)

def train_rnn(px_batches_tens, y_batches_tens, embedding_size, hidden_size, output_size, alpha, epochs, batch_size):
    #inti network
    rnn = ELMAN(embedding_size, hidden_size, output_size, dropout = 0)

    # optimizers 
    obj_func = nn.MSELoss()
    optimizer = torch.optim.Adam(rnn.parameters(), alpha)

    e_loss = {"loss": [], "norm_loss": []}
    for epoch in range(epochs):
        batch_loss = 0.0
        for idx, batch in enumerate(px_batches_tens):
            h0 = torch.zeros(1, batch.shape[0], hidden_size) 
            optimizer.zero_grad()

            # get network output 
            output = rnn(batch, h0)
        
            # get loss 
            loss = obj_func(output, y_batches_tens[idx])
            
            # update network
            loss.backward()
            optimizer.step()

            # update batch loss
            batch_loss += loss.item()

        print(f"Epoch {epoch}:\nBatch loss: {batch_loss}, normalized loss: {batch_loss/batch_size}")
        # store loss
        e_loss["loss"].append(batch_loss)
        e_loss["norm_loss"].append(batch_loss/batch_size)

In [None]:
train_rnn(px_batches_tens, y_batches_tens, embedding_size, hidden_size, output_size, alpha, epochs, batch_size)

### LSTM 

In [131]:
class LSTM(nn.Module):
    def __init__(self, embedding_size, hidden_size, output_size, dropout): 
                #  input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.emb = nn.Embedding(embedding_size, embedding_dim = 150)
        self.rnn = nn.LSTM(150, hidden_size, dropout = dropout, batch_first=True)
        self.lin1 = nn.Linear(hidden_size, output_size)

    def forward(self, x, states):
        # create emebeddings
        x_emb = self.emb(x)

        # pass through rnn
        out, _ = self.rnn(x_emb, states)

        # predict
        out = self.lin1(out[:, -1]) 

        return out
    

In [None]:
# inputs for network and hyperparameters
embedding_size = len(i2w)
hidden_size = 300
output_size = 1
alpha = 0.003
epochs = 10 
num_layers = 1
batch_size = len(px_batches_tens)

#inti network
lstm = LSTM(embedding_size, hidden_size, output_size, dropout = 0)

# optimizers 
obj_func = nn.MSELoss()
optimizer = torch.optim.Adam(lstm.parameters(), alpha)


e_loss = {"loss": [], "norm_loss": []}

for epoch in range(epochs):
    batch_loss = 0.0  
    for idx, batch in enumerate(px_batches_tens):
        # initialize hidden state and cell state 
        h0 = torch.zeros(1, batch.shape[0], hidden_size) 
        c0 = torch.zeros(1, batch.shape[0], hidden_size)
        optimizer.zero_grad()

        # get network output 
        output = lstm(batch, (h0,c0))

        # get loss 
        loss = obj_func(output, y_batches_tens[idx])
        print("Loss of batch",loss)
        # update network
        loss.backward()
        optimizer.step()

        # update batch loss
        batch_loss += loss.item()
        
    e_loss.append(batch_loss)
    print(f"Epoch {epoch}:\nBatch loss: {batch_loss}, normalized loss: {batch_loss/batch_size}")
    # store loss
    e_loss["loss"].append(batch_loss)
    e_loss["norm_loss"].append(batch_loss/batch_size)

Loss of batch tensor(0.4071, grad_fn=<MseLossBackward0>)


### Hyperparameter Tunning 

In [None]:
def train_rnn(model, px_batches_tens, y_batches_tens, alpha, epochs, opt):

    embedding_size = len(i2w)
    hidden_size = 300
    output_size = 1
    alpha = 0.003
    epochs = 10 
    num_layers = 1
    batch_size = len(px_batches_tens)

    # inti network
    rnn = model(embedding_size, hidden_size, output_size, dropout = 0)

    # set objective function 
    obj_func = nn.MSELoss()

    # set optimizer 
    if opt == 0:
        optimizer = torch.optim.Adam(rnn.parameters(), alpha)
    elif opt == 1:
        optimizer = torch.optim.Adadelta(rnn.parameters(), alpha)
    else:
        optimizer = torch.optim.SGD(rnn.parameters(), alpha)
   
    e_loss = {"loss": [], "norm_loss": []}
    for epoch in range(epochs):
        batch_loss = 0.0
        for idx, batch in enumerate(px_batches_tens):
            h0 = torch.zeros(num_layers, batch.shape[0], hidden_size) 
            optimizer.zero_grad()

            # get network output 
            output = rnn(batch, h0)
        
            # get loss 
            loss = obj_func(output, y_batches_tens[idx])
            
            # update network
            loss.backward()
            optimizer.step()

            # update batch loss
            batch_loss += loss.item()

        print(f"Epoch {epoch}:\nBatch loss: {batch_loss}, normalized loss: {batch_loss/batch_size}")
        # store loss
        e_loss["loss"].append(batch_loss)
        e_loss["norm_loss"].append(batch_loss/batch_size)
    
    return e_loss["norm_loss"][-1]

In [103]:
import bayes_opt

In [105]:
#module for optimization
from bayes_opt import BayesianOptimization, UtilityFunction
# module for logging data 
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
# module for retriving datat 
from bayes_opt.util import load_logs

##### MLP Hyperparameter tunning 

In [None]:
from Q1Q2Q3 import Seq2SeqModel
# parameter bounds
pbounds = {"alpha" : ( 0.01, 0.003), "epochs": (100, 125), "opt": (-0.5, 2.5)}

# define wrapped funciton
def train_wrapper(alpha, epochs, opt):
    opt = int(round(opt))
    return train_rnn(Seq2SeqModel, px_batches_tens, y_batches_tens, alpha, epochs, opt = opt)

# create instance of optimizer 
optimizer_bayes = BayesianOptimization(
    f = train_wrapper,
    pbounds = pbounds,
    random_state = 1
)

# create UtilityFunction object for aqu. function
utility = UtilityFunction(kind = "ei", xi= 0.02)

# set gaussian process parameter
optimizer_bayes.set_gp_params(alpha = 1e-6)

# create logger 
logger = JSONLogger(path = "./tunning1.log")
optimizer_bayes.subscribe(Events.OPTIMIZATION_STEP, logger)

# initial search 
optimizer_bayes.maximize(
    init_points = 5, # number of random explorations before bayes_opt
    n_iter = 15, # number of bayes_opt iterations
)

# print out the data from the initial run to check if bounds need update 
for i, param in enumerate(optimizer_bayes.res):
    print(f"Iteration {i}: \n\t {param}")

# get best parameter
print("Best Parameters found: ")
print(optimizer_bayes.max)

#### ELMAN hyperparameter tunning 

In [None]:
# parameter bounds
pbounds = {"alpha" : ( 0.01, 0.003), "epochs": (100, 125), "opt": (-0.5, 2.5)}

# define wrapped funciton
def train_wrapper(alpha, epochs, opt):
    opt = int(round(opt))
    return train_rnn(ELMAN, px_batches_tens, y_batches_tens, alpha, epochs, opt = opt)

# create instance of optimizer 
optimizer_bayes = BayesianOptimization(
    f = train_wrapper,
    pbounds = pbounds,
    random_state = 1
)

# create UtilityFunction object for aqu. function
utility = UtilityFunction(kind = "ei", xi= 0.02)

# set gaussian process parameter
optimizer_bayes.set_gp_params(alpha = 1e-6)

# create logger 
logger = JSONLogger(path = "./tunning1.log")
optimizer_bayes.subscribe(Events.OPTIMIZATION_STEP, logger)

# initial search 
optimizer_bayes.maximize(
    init_points = 5, # number of random explorations before bayes_opt
    n_iter = 15, # number of bayes_opt iterations
)

# print out the data from the initial run to check if bounds need update 
for i, param in enumerate(optimizer_bayes.res):
    print(f"Iteration {i}: \n\t {param}")

# get best parameter
print("Best Parameters found: ")
print(optimizer_bayes.max)

#### LSTM hyperparameter tunning 

In [None]:
# parameter bounds
pbounds = {"alpha" : ( 0.01, 0.003), "epochs": (100, 125), "opt": (-0.5, 2.5)}

# define wrapped funciton
def train_wrapper(alpha, epochs, opt):
    opt = int(round(opt))
    return train_rnn(LSTM, px_batches_tens, y_batches_tens, alpha, epochs, opt = opt)

# create instance of optimizer 
optimizer_bayes = BayesianOptimization(
    f = train_wrapper,
    pbounds = pbounds,
    random_state = 1
)

# create UtilityFunction object for aqu. function
utility = UtilityFunction(kind = "ei", xi= 0.02)

# set gaussian process parameter
optimizer_bayes.set_gp_params(alpha = 1e-6)

# create logger 
logger = JSONLogger(path = "./tunning1.log")
optimizer_bayes.subscribe(Events.OPTIMIZATION_STEP, logger)

# initial search 
optimizer_bayes.maximize(
    init_points = 5, # number of random explorations before bayes_opt
    n_iter = 15, # number of bayes_opt iterations
)

# print out the data from the initial run to check if bounds need update 
for i, param in enumerate(optimizer_bayes.res):
    print(f"Iteration {i}: \n\t {param}")

# get best parameter
print("Best Parameters found: ")
print(optimizer_bayes.max)

  return F.mse_loss(input, target, reduction=self.reduction)


KeyboardInterrupt: 

In [None]:
optimizer1 = torch.optim.Adadelta(elman.parameters(), alpha)
optimizer2 = torch.optim.Adam(elman.parameters(), alpha)

pbounds = {"alpha" : (0.1, 0.01, 0.003), "epochs": (75, 100, 125)}

# define wrapped funciton
def train_wrapper(alpha, epochs):
    return train_rnn(ELMAN, optimizer1, px_batches_tens, y_batches_tens, alpha, epochs)

# create instance of optimizer 
opt_bayes = BayesianOptimization(
    f = train_wrapper,
    pbounds = pbounds,
    random_state = 1
)

# create UtilityFunction object for aqu. function
utility = UtilityFunction(kind = "ei", xi= 0.02)

# set gaussian process parameter
opt_bayes.set_gp_params(alpha = 1e-6)

# create logger 
logger = JSONLogger(path = "./tunning1.log")
opt_bayes.subscribe(Events.OPTIMIZATION_STEP, logger)

# initial search 
opt_bayes.maximize(
    init_points = 5, # number of random explorations before bayes_opt
    n_iter = 15, # number of bayes_opt iterations
)

# print out the data from the initial run to check if bounds need update 
for i, param in enumerate(opt_bayes.res):
    print(f"Iteration {i}: \n\t {param}")

# get best parameter
print("Best Parameters found: ")
print(opt_bayes.max)

ValueError: too many values to unpack (expected 2)

In [None]:
optimizer1 = torch.optim.Adadelta(elman.parameters(), alpha)
optimizer2 = torch.optim.Adam(elman.parameters(), alpha)

# pbounds = {"alpha" : (0.1, 0.01, 0.003), "optimizer" : (optimizer1, optimizer2), "epochs": (75, 100, 125)}
parameter_tuning = [(0.001,optimizer1, 75),(0.01, optimizer2, 100),(0.001, optimizer1, 125)]
def train_wrapper(alpha, optimizer, epochs):
    return train_rnn(ELMAN, optimizer, px_batches_tens, y_batches_tens, alpha, epochs)

for alpha, optimizer, epochs in parameter_tuning:
    train_wrapper(alpha, optimizer, epochs)


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 0:
Batch loss: 5.483054695650935, normalized loss: 0.34269091847818345
Epoch 1:
Batch loss: 5.483054695650935, normalized loss: 0.34269091847818345
Epoch 2:
Batch loss: 5.483054695650935, normalized loss: 0.34269091847818345
Epoch 3:
Batch loss: 5.483054695650935, normalized loss: 0.34269091847818345
Epoch 4:
Batch loss: 5.483054695650935, normalized loss: 0.34269091847818345
Epoch 5:
Batch loss: 5.483054695650935, normalized loss: 0.34269091847818345
Epoch 6:
Batch loss: 5.483054695650935, normalized loss: 0.34269091847818345
Epoch 7:
Batch loss: 5.483054695650935, normalized loss: 0.34269091847818345
Epoch 8:
Batch loss: 5.483054695650935, normalized loss: 0.34269091847818345
Epoch 9:
Batch loss: 5.483054695650935, normalized loss: 0.34269091847818345
Epoch 0:
Batch loss: 13.921797037124634, normalized loss: 0.8701123148202896
Epoch 1:
Batch loss: 13.921797037124634, normalized loss: 0.8701123148202896


KeyboardInterrupt: 

In [None]:
def train_rnn(model, optimizer, px_batches_tens, y_batches_tens, alpha, epochs):

    embedding_size = len(i2w)
    hidden_size = 300
    output_size = 1
    alpha = 0.003
    epochs = 10 
    num_layers = 1
    batch_size = len(px_batches_tens)

    #inti network
    rnn = model(embedding_size, hidden_size, output_size, dropout = 0)

    # optimizers 
    obj_func = nn.MSELoss()
    optimizer = optimizer

    e_loss = {"loss": [], "norm_loss": []}
    for epoch in range(epochs):
        batch_loss = 0.0
        for idx, batch in enumerate(px_batches_tens):
            h0 = torch.zeros(num_layers, batch.shape[0], hidden_size) 
            optimizer.zero_grad()

            # get network output 
            output = rnn(batch, h0)
        
            # get loss 
            loss = obj_func(output, y_batches_tens[idx])
            
            # update network
            loss.backward()
            optimizer.step()

            # update batch loss
            batch_loss += loss.item()

        print(f"Epoch {epoch}:\nBatch loss: {batch_loss}, normalized loss: {batch_loss/batch_size}")
        # store loss
        e_loss["loss"].append(batch_loss)
        e_loss["norm_loss"].append(batch_loss/batch_size)
    
    return e_loss["norm_loss"][-1]


In [None]:
#module for optimization
from bayes_opt import BayesianOptimization, UtilityFunction
# module for logging data 
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
# module for retriving datat 
from bayes_opt.util import load_logs

In [None]:
optimizer1 = torch.optim.Adadelta(lstm.parameters(), alpha)
optimizer2 = torch.optim.bayesopt(lstm.parameters(), alpha)

pbounds = {"alpha" : (0.1, 0.01, 0.003), "optimizer" : (optimizer1, optimizer2), "epochs": (75, 100, 125)}

# define wrapped funciton
def train_wrapper(alpha, optimizer, epochs):
    return train_rnn(ELMAN, optimizer, px_batches_tens, y_batches_tens, alpha, epochs)

# create instance of optimizer 
optimizer1 = BayesianOptimization(
    f = train_wrapper,
    pbounds = pbounds,
    random_state = 1
)

# create UtilityFunction object for aqu. function
utility = UtilityFunction(kind = "ei", xi= 0.02)

# set gaussian process parameter
optimizer1.set_gp_params(alpha = 1e-6)

# create logger 
logger = JSONLogger(path = "./tunning1.log")
optimizer1.subscribe(Events.OPTIMIZATION_STEP, logger)

# initial search 
optimizer1.maximize(
    init_points = 5, # number of random explorations before bayes_opt
    n_iter = 15, # number of bayes_opt iterations
)

# print out the data from the initial run to check if bounds need update 
for i, param in enumerate(optimizer1.res):
    print(f"Iteration {i}: \n\t {param}")

# get best parameter
print("Best Parameters found: ")
print(optimizer1.max)

In [None]:
optimizer1 = torch.optim.Adadelta(lstm.parameters(), alpha)
optimizer2 = torch.optim.bayesopt(lstm.parameters(), alpha)

pbounds = {"alpha" : (0.1, 0.01, 0.003), "optimizer" : (optimizer1, optimizer2), "epochs": (75, 100, 125)}

# define wrapped funciton
def train_wrapper(alpha, optimizer, epochs):
    return train_rnn(LSTM, optimizer, px_batches_tens, y_batches_tens, alpha, epochs)

# create instance of optimizer 
optimizer1 = BayesianOptimization(
    f = train_wrapper,
    pbounds = pbounds,
    random_state = 1
)

# create UtilityFunction object for aqu. function
utility = UtilityFunction(kind = "ei", xi= 0.02)

# set gaussian process parameter
optimizer1.set_gp_params(alpha = 1e-6)

# create logger 
logger = JSONLogger(path = "./tunning1.log")
optimizer1.subscribe(Events.OPTIMIZATION_STEP, logger)

# initial search 
optimizer1.maximize(
    init_points = 5, # number of random explorations before bayes_opt
    n_iter = 15, # number of bayes_opt iterations
)

# print out the data from the initial run to check if bounds need update 
for i, param in enumerate(optimizer1.res):
    print(f"Iteration {i}: \n\t {param}")

# get best parameter
print("Best Parameters found: ")
print(optimizer1.max)