In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import torch
import torch.nn as nn
from data import gen_data
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence,\
    pad_sequence, \
    pad_packed_sequence

import torch.optim as optim

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

data = gen_data(n_examples=1024,
                start_low=0, start_high=10,
                delta_low=1, delta_high=5,
                len_low=5, len_high=30)


class CustomData(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

In [9]:
def func(seq):
    print(type(seq))
    print(seq)
    seq = [torch.from_numpy(s).float() for s in seq]
    lengths = [len(s) for s in seq]
    
    padded_seq = pad_sequence(seq, batch_first=True).unsqueeze(-1) #this changes if use embeddings i.e. no unsqueeze but map to embeddings
    packed_padded_seq = pack_padded_sequence(padded_seq,
                                             lengths=lengths,
                                             batch_first=True,
                                             enforce_sorted=False)
    return packed_padded_seq
   

In [6]:
class AE_rnn(nn.Module):
        def __init__(self,
                batch_size     = 2,
                bidir          = True,
                num_layers_enc = 3,
                hidden_dim_enc = 5,
                num_layers_dec = 7,
                hidden_dim_dec = 9,
                emb_dim        = 11,
                N_max          = 256):
                super().__init__()
                self.dir            = 2 if bidir else 1

                self.num_layers_dec = num_layers_dec
                self.batch_size     = batch_size
                self.enc = nn.LSTM(input_size=emb_dim,
                        bidirectional=True,
                        num_layers= num_layers_enc,
                        hidden_size=hidden_dim_enc,
                        batch_first=True)

                self.enc = nn.LSTM(input_size=emb_dim,
                                bidirectional=bidir,
                                num_layers= num_layers_enc,
                                hidden_size=hidden_dim_enc,
                                batch_first=True)

                self.dec = nn.LSTM(input_size=1,
                        bidirectional=bidir,
                        num_layers= num_layers_dec,
                        hidden_size=hidden_dim_dec,
                        batch_first=True)

                self.emb = nn.Embedding(num_embeddings=N_max,
                                embedding_dim=emb_dim)


                self.proj1 = nn.Linear(in_features=dir*num_layers_enc*hidden_dim_enc,#state.shape[1],
                                out_features=dir*num_layers_dec*hidden_dim_dec) 
                        
                self.proj2 = nn.Linear(in_features=dir*num_layers_enc*hidden_dim_enc,#state.shape[1],
                                out_features=dir*num_layers_dec*hidden_dim_dec)


                self.pred  = nn.Linear(in_features=dir*hidden_dim_dec,
                                        out_features=N_max)
        def forward(self, seq, lengths):
                padded_seq_emb = self.emb(seq)
                packed_padded_seq = pack_padded_sequence(padded_seq_emb,
                                                        lengths=lengths,#.tolist(),
                                                        batch_first=True,
                                                        enforce_sorted=False)

                out_enc, (hn, cn) = self.enc(packed_padded_seq)
                #print(hn.shape)
                hn = self.proj1(hn.permute(1,0,2).flatten(1)).reshape(self.batch_size,self.num_layers_dec*self.dir,-1).permute(1,0,2)
                cn = self.proj2(cn.permute(1,0,2).flatten(1)).reshape(self.batch_size,self.num_layers_dec*self.dir,-1).permute(1,0,2)
                out_dec, (hn_dec, cn_dec) = self.dec(seq.unsqueeze(2).float(), (hn, cn))
                out_pred = self.pred(out_dec)
                return out_pred
                


In [10]:
bidir          = True
dir            = 2 if bidir else 1 
batch_size     = 64
num_layers_enc = 3
hidden_dim_enc = 5
num_layers_dec = 7
hidden_dim_dec = 9
emb_dim        = 11
N_max          = 256


ds = CustomData(data) #dataset
dl = DataLoader(ds,
                batch_size=batch_size,
                collate_fn=func) 

ae = AE_rnn(batch_size     = batch_size,
            bidir          = True,
            num_layers_enc = num_layers_enc,
            hidden_dim_enc = hidden_dim_enc,
            num_layers_dec = num_layers_dec,
            hidden_dim_dec = num_layers_dec,
            emb_dim        = emb_dim,
            N_max          = N_max)

In [137]:
print(len(ds))

1000


In [21]:
emb = nn.Embedding(num_embeddings=N_max,
                    embedding_dim=emb_dim)
for idx in range(0, len(ds), batch_size):
    seq = ds[idx:idx+batch_size]
    seq = [torch.from_numpy(s) for s in seq]
    lengths = torch.tensor([len(s) for s in seq])
    padded_seq = pad_sequence(seq, batch_first=True)
    packed_padded_seq = pack_padded_sequence(padded_seq,
                                lengths=lengths,#.tolist(),
                                batch_first=True,
                                enforce_sorted=False)
    print(padded_seq)
    padded_seq_emb = emb(padded_seq)
    packed_padded_emb_seq = pack_padded_sequence(padded_seq_emb,
                                lengths=lengths,#.tolist(),
                                batch_first=True,
                                enforce_sorted=False)
    print(packed_padded_emb_seq)
    


tensor([[ 8, 12, 16,  ...,  0,  0,  0],
        [ 9, 12, 15,  ...,  0,  0,  0],
        [ 2,  3,  4,  ...,  0,  0,  0],
        ...,
        [ 0,  2,  4,  ...,  0,  0,  0],
        [ 5,  6,  7,  ..., 31, 32, 33],
        [ 1,  4,  7,  ...,  0,  0,  0]])
PackedSequence(data=tensor([[-1.5914,  0.7805,  0.5566,  ...,  0.2058,  0.6892, -0.8599],
        [-0.8311,  1.9699, -0.6179,  ...,  1.5134,  1.2100, -0.3859],
        [-0.6370,  0.0404,  0.4163,  ...,  0.0052,  0.3661,  1.2861],
        ...,
        [ 0.1332,  1.5951, -0.2018,  ...,  0.5817, -1.1779, -0.4213],
        [-0.2169, -0.4722, -0.4461,  ...,  0.3102,  0.1123,  0.0378],
        [-0.3741,  0.7105,  1.3104,  ..., -0.3167,  0.3004, -0.4434]],
       grad_fn=<PackPaddedSequenceBackward0>), batch_sizes=tensor([64, 64, 64, 64, 64, 60, 59, 59, 56, 55, 54, 51, 50, 48, 45, 39, 38, 32,
        29, 27, 25, 17, 13, 11,  8,  6,  5,  4,  3]), sorted_indices=tensor([62, 55, 35, 56, 27, 43, 17, 42, 63, 48, 40,  3, 59, 21, 20, 45,  7, 44,
    

In [194]:
if torch.cuda.is_available():
    device = torch.device("cuda")  # Or "cuda:0" for the first GPU
else:
    device = torch.device("cpu")
ae.to(device)


data_gpu = []
for idx in range(0, len(ds), batch_size):
    seq = ds[idx:idx+batch_size]
    seq = [torch.from_numpy(s).to(device) for s in seq]
    padded_seq = pad_sequence(seq, batch_first=True)
    lengths = [len(s) for s in seq]
    lengths = torch.tensor([len(s) for s in seq]).to(device)
    data_gpu.append((padded_seq, lengths))


criterion = nn.CrossEntropyLoss()
#data_emb_device = [(edge_index.to(device), [bb.to(device) for bb in node_embs ]) for (edge_index, node_embs) in data_emb] 
optimizer = optim.Adam(ae.parameters(), lr=0.01)
epoch_num = 1000
for epoch in range(epoch_num):
    total_loss = 0
    total_items = 0
    j = -1
    for (padded_seq, lengths) in data_gpu:
        out = ae(padded_seq, lengths)
        loss = criterion(out.flatten(0).reshape(-1,N_max), padded_seq.flatten())
        total_loss  += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'For {j+1} functions: Epoch {epoch+1}/{epoch_num}, Average Loss: {total_loss:.5f}', end='\r')
    

RuntimeError: 'lengths' argument should be a 1D CPU int64 tensor, but got 1D cuda:0 Long tensor

In [193]:
print(ae)

AE_rnn(
  (enc): LSTM(11, 5, num_layers=3, batch_first=True, bidirectional=True)
  (dec): LSTM(1, 7, num_layers=7, batch_first=True, bidirectional=True)
  (emb): Embedding(256, 11)
  (proj1): Linear(in_features=30, out_features=98, bias=True)
  (proj2): Linear(in_features=30, out_features=98, bias=True)
  (pred): Linear(in_features=14, out_features=256, bias=True)
)


In [None]:
epoch_num = 1000
for epoch in range(epoch_num):
    total_loss = 0
    total_items = 0
    j = -1
    for graph in data_emb_device:
        j += 1
        edge_index, node_embs = graph
        for bb in node_embs:
            #bb_device = bb.to(device)
            out, embs = ae(bb)
            #print(a.shape)
            #print(out)
            #total_loss += unsupervised_loss.item()
            #loss = criterion(out, embs)
            loss = criterion(out, bb)
            #loss = criterion(output, initial_node_embeddings[j])
            # Backward pass and optimization
            total_loss  += loss.item()
            total_items += bb.shape[0]

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # Print or log the average loss for monitoring
    if epoch%10==0 or epoch==epoch_num-1:
        avg_loss = total_loss / len(data_emb)
        print(f'For {j+1} functions: Epoch {epoch+1}/{epoch_num}, Average Loss: {avg_loss:.5f}', end='\r')
    if epoch%100==0 or epoch==epoch_num-1:
        avg_loss = total_loss / len(data_emb)
        print(f'For {j+1} functions: Epoch {epoch+1}/{epoch_num}, Average Loss: {avg_loss:.5f}', end='\n')