In [1]:
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
import math
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset,random_split

In [11]:
#import os
#os.chdir("./../.")
#os.getcwd()

'c:\\Users\\Okan\\Desktop\\Team Project'

In [3]:
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x15f0ad7a490>

### Working Transformer

In [39]:
class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        assert d_model % num_heads == 0, "d_model must be divisible by num_heads"
        
        self.d_model = d_model
        self.num_heads = num_heads
        self.d_k = d_model // num_heads
        
        self.W_q = nn.Linear(d_model, d_model)
        self.W_k = nn.Linear(d_model, d_model)
        self.W_v = nn.Linear(d_model, d_model)
        self.W_o = nn.Linear(d_model, d_model)
        
    def scaled_dot_product_attention(self, Q, K, V, mask=None):
        attn_scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)
        batch_size = Q.size()[0]
        seq_length = Q.size()[1]
        #mask = mask.view(batch_size, 1, 1, seq_length).expand(-1, self.num_heads, -1, -1)
        mask = mask.unsqueeze(1).unsqueeze(1).expand(-1, self.num_heads, -1, -1)
        mask = mask.permute(0, 1, 3, 2)
        #print("mask shape:",mask.shape)
        #print(attn_scores.shape)
        if mask is not None:
            attn_scores = attn_scores.masked_fill(mask == True, -1e9)
        attn_probs = torch.softmax(attn_scores, dim=-1)
        output = torch.matmul(attn_probs, V)
        return output
        
    def split_heads(self, x):
        batch_size, seq_length, d_model = x.size()
        return x.view(batch_size, seq_length, self.num_heads, self.d_k).transpose(1, 2)
        
    def combine_heads(self, x):
        batch_size, _, seq_length, d_k = x.size()
        return x.transpose(1, 2).contiguous().view(batch_size, seq_length, self.d_model)
        
    def forward(self, Q, K, V, mask=None):
        #print("Q:",Q.shape)
        Q = self.split_heads(self.W_q(Q))
        K = self.split_heads(self.W_k(K))
        V = self.split_heads(self.W_v(V))
        
        attn_output = self.scaled_dot_product_attention(Q, K, V, mask)
        output = self.W_o(self.combine_heads(attn_output))
        return output

In [40]:
class PositionWiseFeedForward(nn.Module):
    def __init__(self, d_model, d_ff):

        #d_ff: dimension of hidden layer

        super(PositionWiseFeedForward, self).__init__()
        self.fc1 = nn.Linear(d_model, d_ff)
        self.fc2 = nn.Linear(d_ff, d_model)
        self.relu = nn.ReLU()

    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

In [41]:
class PositionalEncoding2(nn.Module):
    def __init__(self, d_model, max_seq_length):
        super(PositionalEncoding2, self).__init__()
        
        pe = torch.zeros(max_seq_length, d_model)
        position = torch.arange(0, max_seq_length, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model))
        
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        
        self.register_buffer('pe', pe.unsqueeze(0))
        
    def forward(self, x):
        return x + self.pe[:, :x.size(1)]

In [42]:
class EncoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout):
        super(EncoderLayer, self).__init__()
        self.self_attn = MultiHeadAttention(d_model, num_heads)
        self.feed_forward = PositionWiseFeedForward(d_model, d_ff)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x, mask):
        attn_output = self.self_attn(x, x, x, mask)
        x = self.norm1(x + self.dropout(attn_output))
        ff_output = self.feed_forward(x)
        x = self.norm2(x + self.dropout(ff_output))
        return x

In [43]:
class DecoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, d_ff, dropout):
        super(DecoderLayer, self).__init__()
        self.self_attn = MultiHeadAttention(d_model, num_heads)
        self.cross_attn = MultiHeadAttention(d_model, num_heads)
        self.feed_forward = PositionWiseFeedForward(d_model, d_ff)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x, enc_output, src_mask, tgt_mask):
        attn_output = self.self_attn(x, x, x, tgt_mask)
        x = self.norm1(x + self.dropout(attn_output))
        attn_output = self.cross_attn(x, enc_output, enc_output, src_mask)
        x = self.norm2(x + self.dropout(attn_output))
        ff_output = self.feed_forward(x)
        x = self.norm3(x + self.dropout(ff_output))
        return x

In [44]:
class TransformerFromscratch(nn.Module):
    def __init__(self, tgt_vocab_size, d_model, num_heads, num_layers, d_ff, max_seq_length, dropout):
        super(TransformerFromscratch, self).__init__()
        #self.encoder_embedding = nn.Embedding(src_vocab_size, d_model)
        #self.decoder_embedding = nn.Embedding(tgt_vocab_size, d_model)
        self.positional_encoding = PositionalEncoding2(d_model, max_seq_length)

        self.encoder_layers = nn.ModuleList([EncoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])
        self.decoder_layers = nn.ModuleList([DecoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)])

        
        self.fc = nn.Linear(d_model, tgt_vocab_size)
        self.dropout = nn.Dropout(dropout)

    def generate_mask(self, src, tgt):
        src_mask = (src != 10).unsqueeze(1).unsqueeze(2)
        tgt_mask = (tgt != 10).unsqueeze(1).unsqueeze(3)
        seq_length = tgt.size(1)
        print("seq_length",seq_length)
        nopeak_mask = (1 - torch.triu(torch.ones(1, seq_length, seq_length), diagonal=1)).bool()
        print("nopeak:",nopeak_mask.shape)
        print("tgt_mask:",tgt_mask.shape)
        tgt_mask = tgt_mask & nopeak_mask
        return src_mask, tgt_mask
    
    def create_pad_mask(self, input : torch.tensor, pad_token: int) -> torch.tensor:
        # If matrix = [1,2,3,0,0,0] where pad_token=0, the result mask is
        # [False, False, False, True, True, True]

        t = []
        for seq in input:
            list =[]
            for token in seq:
                b = False
                for value in token:
                    if value == pad_token:
                        b = True
                list.append(b)
            t.append(list)
        return torch.tensor(t)

        #return (input == pad_token)
    
    def get_tgt_mask(self, size) -> torch.tensor:
        # Generates a square matrix where the each row allows one word more to be seen
        mask = torch.tril(torch.ones(size, size) == 1) # Lower triangular matrix
        mask = mask.float()
        mask = mask.masked_fill(mask == 0, float('-inf')) # Convert zeros to -inf
        mask = mask.masked_fill(mask == 1, float(0.0)) # Convert ones to 0
        
        # EX for size=5:
        # [[0., -inf, -inf, -inf, -inf],
        #  [0.,   0., -inf, -inf, -inf],
        #  [0.,   0.,   0., -inf, -inf],
        #  [0.,   0.,   0.,   0., -inf],
        #  [0.,   0.,   0.,   0.,   0.]]
        
        return mask


    def forward(self, src, tgt):
        #src_mask, tgt_mask = self.generate_mask(src, tgt)
        src_mask = self.create_pad_mask(src,pad_token=10).to(device)
        tgt_mask = self.create_pad_mask(tgt,pad_token=10).to(device)
        src_embedded = self.dropout(self.positional_encoding(src))
        tgt_embedded = self.dropout(self.positional_encoding(tgt))

        enc_output = src_embedded
        for enc_layer in self.encoder_layers:
            enc_output = enc_layer(enc_output, src_mask)

        dec_output = tgt_embedded
        for dec_layer in self.decoder_layers:
            dec_output = dec_layer(dec_output, enc_output, src_mask, tgt_mask)

        output = self.fc(dec_output)
        return output

In [45]:
model2 = TransformerFromscratch(tgt_vocab_size=268, d_model=268, num_heads=4, num_layers=8, d_ff=2048, dropout=0.1,max_seq_length=10)
model2.to(device)
opt = torch.optim.SGD(model2.parameters(), lr=0.01)
loss_fn = nn.MSELoss()

In [46]:
def train_loop(model, opt, loss_fn, dataloader):
    """
    Method from "A detailed guide to Pytorch's nn.Transformer() module.", by
    Daniel Melchor: https://medium.com/@danielmelchor/a-detailed-guide-to-pytorchs-nn-transformer-module-c80afbc9ffb1
    """
    
    model.train()
    total_loss = 0
    
    
    for batch in dataloader:
        X, y = batch[0], batch[1]
        #X, y = torch.tensor(X), torch.tensor(y)

        # Now we shift the tgt by one so with the <SOS> we predict the token at pos 1
        y_input = y[:,:-1]
        y_expected = y[:,1:]
        
        # Get mask to mask out the next words
        #sequence_length = y.size(1)
        #tgt_mask = model.get_tgt_mask(sequence_length)
        
        #print(y.view(y.size(0), -1).shape)
        #pad_mask = model.create_pad_mask(y, pad_token=-100)
        #print(pad_mask)

        # Standard training except we pass in y_input and tgt_mask
        
        #print(torch.isnan(X).any())
        #print(torch.isnan(y).any())

        #pred = model(X, y, tgt_mask, pad_mask)

        pred = model2(X,y)

        #print("pred shape:",pred.shape)
        #print("y shape:",y.shape)

        y_flattened = y.contiguous().view(-1)

        #print(y_flattened)

        # Permute pred to have batch size first again
        #pred = pred.permute(1, 2, 0)      
        loss = loss_fn(pred, y)

        #print("prediction:",pred)

        opt.zero_grad()
        loss.backward()

        # tryign to print the gradient
        #for p in model.parameters():
        #    print(p.grad.norm())

        #for name, param in model2.named_parameters():
        #    if 'weight' in name:
         #       print(name)
         #       print(param.data.cpu().numpy().shape)
         #       print('gradient is \t', param.grad, '\trequires grad: ', param.requires_grad)

        # gradient clipping to avoid the exploding gradient problem
        torch.nn.utils.clip_grad_value_(model2.parameters(), 10.)

        opt.step()
    
        total_loss += loss.detach().item()
        
    return total_loss / len(dataloader)

In [47]:
from torch.utils.data import TensorDataset, DataLoader
train = TensorDataset(inpTensor2.float(), outTensor2.float())
batch_size = 200 # Set your desired batch size
train_dataloader = DataLoader(train, batch_size=batch_size, shuffle=True)  # For input data

In [48]:
def fit(model, opt, loss_fn, train_dataloader, epochs):
    """
    Method from "A detailed guide to Pytorch's nn.Transformer() module.", by
    Daniel Melchor: https://medium.com/@danielmelchor/a-detailed-guide-to-pytorchs-nn-transformer-module-c80afbc9ffb1
    """
    
    # Used for plotting later on
    train_loss_list, validation_loss_list = [], []
    
    print("Training and validating model")
    for epoch in range(epochs):
        print("-"*25, f"Epoch {epoch + 1}","-"*25)
        
        train_loss = train_loop(model2, opt, loss_fn, train_dataloader)
        train_loss_list += [train_loss]
        
        #validation_loss = validation_loop(model, loss_fn, val_dataloader)
        #validation_loss_list += [validation_loss]
        
        print(f"Training loss: {train_loss:.4f}")
        #print(f"Validation loss: {validation_loss:.4f}")
        print()
        
    return train_loss_list, validation_loss_list
    
train_loss_list, validation_loss_list = fit(model2, opt, loss_fn, train_dataloader, 30)

Training and validating model
------------------------- Epoch 1 -------------------------
Training loss: 620.0747

------------------------- Epoch 2 -------------------------
Training loss: 586.9604

------------------------- Epoch 3 -------------------------
Training loss: 560.4023

------------------------- Epoch 4 -------------------------
Training loss: 535.7889

------------------------- Epoch 5 -------------------------
Training loss: 507.6911

------------------------- Epoch 6 -------------------------
Training loss: 480.7698

------------------------- Epoch 7 -------------------------
Training loss: 454.3157

------------------------- Epoch 8 -------------------------
Training loss: 426.2244

------------------------- Epoch 9 -------------------------
Training loss: 397.7719

------------------------- Epoch 10 -------------------------
Training loss: 370.1154



### Filtering the output 2

In [36]:
torch.__version__
torch.cuda.is_available()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = "cpu"
print(device)

cpu


In [37]:
with open("data/surrogate_model/animation_label.pkl", "rb") as f:
    surrogate2 = pickle.load(f)

#Filter with only good or very good ratings
#filtered_output = surrogate[surrogate)][['label'].isin(['Good','Very Good']["file","animation_id","model_output","label"]]
filtered_output2 = surrogate2[["file","animation_id","model_output","label"]]

# dictionary for mapping

mapping_dict = {"Very Good": 6, "Good": 5, "Bad": 4,"Okay":3, "Very Bad": 2, "no_rating": 1}

# Create another column changing the label into ints

filtered_output2['label2'] = filtered_output2['label'].replace(mapping_dict)


# get the names of unique logos by splitting with animation number
logos = filtered_output2["file"].str.split("_animation").str[0].unique()

#print(logos)

# create a data frame for the collected best animations
bestoutput2 = pd.DataFrame()

# go through each logo to find the best animation
for logo in logos:

    # make a data frame that contains all the animations of one logo
    temp = filtered_output2[filtered_output2["file"].str.contains(logo)]

    #display(temp)

    # create a sum 
    mean_by_label = temp.groupby('file')['label2'].mean().reset_index()

    #print(mean_by_label)

    bestlogo = mean_by_label.loc[mean_by_label['label2'].idxmax()]

    #print(bestlogo)

    # get all the animated paths with the best animation of the logo
    best_animations2 = temp[temp["file"]==bestlogo["file"]]

    # add to the file
    bestoutput2 = pd.concat([bestoutput2,best_animations2],axis=0, ignore_index=True)
bestoutput2 = bestoutput2.sort_values(by=['file','animation_id'])
display(bestoutput2)

filenames = bestoutput2["file"].unique()
list = []
for name in filenames:
    seq = bestoutput2[bestoutput2["file"]==name]
    seq = seq["model_output"]
    seq = pd.DataFrame(bestoutput2["model_output"].to_list(), columns=["a1","a2","a3","a4","a5","a6","a7","a8","a9","a10","a11","a12"])
    
    seq = pd.concat([pd.DataFrame(10, index=seq.index, columns=range(0, 256)), seq], axis=1, ignore_index=True)
    
    if len(seq) > 4:
        seq = seq[:4]

    sos = pd.DataFrame([[30]*268])
    
    eos = pd.DataFrame([[50]*268])

    seq = pd.concat([sos, seq, eos], ignore_index=True)
    
    while len(seq) < 6:
           seq = pd.concat([seq, pd.DataFrame([[-100]*268])], ignore_index=True)
           
    #seq = seq.apply(lambda x: np.array(x).astype(np.float32))
    #tokens = []
    #for l in seq:
    #    tokens.append(torch.tensor(l))

    list.append(torch.tensor(seq.values))
outTensor2 = torch.stack(list)

outTensor2 = outTensor2.to(device)
print(outTensor2.max())
print(outTensor2.shape)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_output2['label2'] = filtered_output2['label'].replace(mapping_dict)


Unnamed: 0,file,animation_id,model_output,label,label2
436,logo_0_animation_0,0,"[0, 0, 0, 1, 0, 0, -1.0, -1.0, -1.0, -1.0, 0.8...",Good,5.0
439,logo_0_animation_0,1,"[0, 0, 0, 0, 1, 0, -1.0, -1.0, -1.0, -1.0, -1....",Good,5.0
438,logo_0_animation_0,3,"[0, 0, 0, 0, 0, 1, -1.0, -1.0, -1.0, -1.0, -1....",Good,5.0
437,logo_0_animation_0,4,"[0, 0, 1, 0, 0, 0, -1.0, -1.0, -1.0, 0.4205715...",Good,5.0
435,logo_0_animation_0,5,"[0, 0, 0, 0, 1, 0, -1.0, -1.0, -1.0, -1.0, -1....",Good,5.0
...,...,...,...,...,...
892,logo_99_animation_0,21,"[0, 0, 0, 1, 0, 0, -1.0, -1.0, -1.0, -1.0, 0.4...",Very Good,6.0
811,logo_99_animation_0,22,"[0, 0, 0, 1, 0, 0, -1.0, -1.0, -1.0, -1.0, 0.8...",Very Good,6.0
891,logo_99_animation_0,22,"[0, 0, 0, 1, 0, 0, -1.0, -1.0, -1.0, -1.0, 0.8...",Very Good,6.0
810,logo_99_animation_0,23,"[0, 0, 0, 0, 1, 0, -1.0, -1.0, -1.0, -1.0, -1....",Very Good,6.0


tensor(50., dtype=torch.float64)
torch.Size([359, 6, 268])


### Creating the input tensor with the diltered output

In [38]:
with open("data\embeddings\path_embedding.pkl", "rb") as f:
    inp = pickle.load(f)

bestoutput2["filename"] = bestoutput2["file"].str.split("_animation").str[0]

display(bestoutput2)

inp['animation_id'] = inp['animation_id'].astype(int)

#names = bestoutput2["file"].str.replace("_animation_0", "")

#input = inp[(inp["filename"].isin(bestoutput2["filename"])) & (inp["animation_id"].isin(bestoutput2["animation_id"]))]
input = pd.merge(bestoutput2, inp, on=['filename', 'animation_id'],how='inner')
input = input.drop(['model_output', 'label','label2','file'], axis=1)

input = input.sort_values(by=['filename','animation_id'])
display(input)
filenames = input["filename"].unique()
#print(filenames)
list = []
for name in filenames:
    #print(name)
    seq = input[input["filename"]==name].loc[:, ~inp.columns.isin(["filename","animation_id"])][:4]
    #print(seq)
    seq = pd.concat([seq, pd.DataFrame(10, index=seq.index, columns=range(256, 268))], axis=1, ignore_index=True)

    while len(seq) < 4:
        seq = pd.concat([seq, pd.DataFrame([[10]*268])], ignore_index=True)

    sos = pd.DataFrame([[30]*268])

    
    eos = pd.DataFrame([[50]*268])

    seq = pd.concat([sos, seq, eos], ignore_index=True)

    list.append(torch.tensor(seq.values))
    #print(list)
inpTensor2 = torch.stack(list)
inpTensor2 = inpTensor2.to(device)
print(inpTensor2.shape)

Unnamed: 0,file,animation_id,model_output,label,label2,filename
436,logo_0_animation_0,0,"[0, 0, 0, 1, 0, 0, -1.0, -1.0, -1.0, -1.0, 0.8...",Good,5.0,logo_0
439,logo_0_animation_0,1,"[0, 0, 0, 0, 1, 0, -1.0, -1.0, -1.0, -1.0, -1....",Good,5.0,logo_0
438,logo_0_animation_0,3,"[0, 0, 0, 0, 0, 1, -1.0, -1.0, -1.0, -1.0, -1....",Good,5.0,logo_0
437,logo_0_animation_0,4,"[0, 0, 1, 0, 0, 0, -1.0, -1.0, -1.0, 0.4205715...",Good,5.0,logo_0
435,logo_0_animation_0,5,"[0, 0, 0, 0, 1, 0, -1.0, -1.0, -1.0, -1.0, -1....",Good,5.0,logo_0
...,...,...,...,...,...,...
892,logo_99_animation_0,21,"[0, 0, 0, 1, 0, 0, -1.0, -1.0, -1.0, -1.0, 0.4...",Very Good,6.0,logo_99
811,logo_99_animation_0,22,"[0, 0, 0, 1, 0, 0, -1.0, -1.0, -1.0, -1.0, 0.8...",Very Good,6.0,logo_99
891,logo_99_animation_0,22,"[0, 0, 0, 1, 0, 0, -1.0, -1.0, -1.0, -1.0, 0.8...",Very Good,6.0,logo_99
810,logo_99_animation_0,23,"[0, 0, 0, 0, 1, 0, -1.0, -1.0, -1.0, -1.0, -1....",Very Good,6.0,logo_99


Unnamed: 0,animation_id,filename,0,1,2,3,4,5,6,7,...,246,247,248,249,250,251,252,253,254,255
0,0,logo_0,0.763518,-0.982797,-0.446681,1.089468,-0.070563,0.710206,-0.491675,-1.631172,...,0.339061,0.022934,0.195161,-0.046488,-0.492103,-0.605836,-1.282879,0.613195,0.297194,-0.172312
1,1,logo_0,0.851117,-1.775123,0.649689,-0.688600,0.216071,0.135211,-1.748761,-1.347670,...,-1.616431,-1.572003,0.242460,0.430259,0.079752,-1.039526,-0.696104,0.090277,-0.228757,0.144372
2,3,logo_0,0.291136,-0.928242,0.265542,-0.261439,-0.386160,1.256256,-0.414706,-1.206105,...,-0.446020,-1.369758,0.356421,1.456656,0.468766,-1.077724,-0.548627,-0.300660,0.632805,-0.136473
3,4,logo_0,0.504446,-0.543099,0.915062,1.293575,-0.849605,1.120387,-0.637641,-1.337280,...,0.162753,0.206993,-0.201259,-1.087391,-0.597388,-0.992079,-0.851486,-0.225463,-0.549269,0.088637
4,5,logo_0,-0.641569,-0.657125,-0.105109,-0.031630,-0.572032,0.912017,-0.569627,-1.573482,...,-1.079276,0.285482,1.532865,-0.375210,-0.249130,-0.551393,-1.024246,0.623726,-1.073305,0.166613
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
902,21,logo_99,0.729620,-1.035586,0.390709,1.910684,-0.489203,2.197111,0.424709,-0.389426,...,1.130544,-1.892641,-0.690288,1.371062,0.718980,-0.827454,-0.557172,0.235162,0.523978,-0.592003
903,22,logo_99,1.147938,-0.799806,0.664411,1.610482,-0.612273,1.194499,0.076532,-0.662109,...,0.415811,-1.569571,-0.424470,0.518902,0.160598,-1.283986,-0.463894,0.319661,0.785325,-0.294870
904,22,logo_99,1.147938,-0.799806,0.664411,1.610482,-0.612273,1.194499,0.076532,-0.662109,...,0.415811,-1.569571,-0.424470,0.518902,0.160598,-1.283986,-0.463894,0.319661,0.785325,-0.294870
905,23,logo_99,1.645128,-1.911424,0.806118,0.850210,0.554424,1.578238,-0.722733,-0.704658,...,0.883759,-2.000007,-0.517095,2.213172,0.733567,-0.577074,-0.710312,-0.591997,0.836300,-0.720247


torch.Size([359, 6, 268])


In [80]:
test = inpTensor2[:1,:,:]
test = test.to(torch.float32)
test = test.to
print(test.shape)

torch.Size([1, 6, 268])


In [151]:
def predict(model, input_sequence, max_length=6, SOS_token=[30 * 268], EOS_token=50):
    """
    Method from "A detailed guide to Pytorch's nn.Transformer() module.", by
    Daniel Melchor: https://medium.com/@danielmelchor/a-detailed-guide-to-pytorchs-nn-transformer-module-c80afbc9ffb1
    """
    model.eval()
    
    y_input = torch.tensor([[[SOS_token]*268]], dtype=torch.float32)

    num_tokens = len(input_sequence[0])

    for _ in range(max_length):
        # Get source mask
        #tgt_mask = model.get_tgt_mask(y_input.size(1))
        
        print(input_sequence.shape, y_input.shape)
        pred = model(input_sequence, y_input)
        print(pred.shape)
        next_item = pred.topk(1)[1].view(-1)[-1].item() # num with highest probability
        next_item = torch.tensor([[next_item]])
        
        next_item = pred[:,:1,:]
        print(next_item.shape)

        #print(y_input, next_item)
        # Concatenate previous input with predicted best word
        #print(y_input.shape, next_item.shape)
        y_input = torch.cat((y_input, next_item), dim=1)

        # Stop if model predicts end of sentence
        print(next_item.view(-1).shape)
        if next_item.view(-1).item() == [EOS_token*268]:
            break

    return y_input.view(-1).tolist()

svg_animations = predict(model2, test)

torch.Size([1, 6, 268]) torch.Size([1, 1, 268])
torch.Size([1, 6, 268])
torch.Size([1, 1, 268])
torch.Size([268])


RuntimeError: a Tensor with 268 elements cannot be converted to Scalar

In [11]:
import sys
sys.path.append('AnimateSVG/src')
from AnimateSVG.src.pipeline import *

for i, row in svg_animations.iterrows():
            try:
                self._insert_animation(row['animation_id'], row['animation_vector'], filename_suffix=row['model'])
            except FileNotFoundError:
                print(f"File not found: {row['filename']}")
                pass

Unnamed: 0,animation_id,filename,0,1,2,3,4,5,6,7,...,246,247,248,249,250,251,252,253,254,255
0,0,logo_0,0.763518,-0.982797,-0.446681,1.089468,-0.070563,0.710206,-0.491675,-1.631172,...,0.339061,0.022934,0.195161,-0.046488,-0.492103,-0.605836,-1.282879,0.613195,0.297194,-0.172312
1,1,logo_0,0.851117,-1.775123,0.649689,-0.688600,0.216071,0.135211,-1.748761,-1.347670,...,-1.616431,-1.572003,0.242460,0.430259,0.079752,-1.039526,-0.696104,0.090277,-0.228757,0.144372
2,3,logo_0,0.291136,-0.928242,0.265542,-0.261439,-0.386160,1.256256,-0.414706,-1.206105,...,-0.446020,-1.369758,0.356421,1.456656,0.468766,-1.077724,-0.548627,-0.300660,0.632805,-0.136473
3,4,logo_0,0.504446,-0.543099,0.915062,1.293575,-0.849605,1.120387,-0.637641,-1.337280,...,0.162753,0.206993,-0.201259,-1.087391,-0.597388,-0.992079,-0.851486,-0.225463,-0.549269,0.088637
4,5,logo_0,-0.641569,-0.657125,-0.105109,-0.031630,-0.572032,0.912017,-0.569627,-1.573482,...,-1.079276,0.285482,1.532865,-0.375210,-0.249130,-0.551393,-1.024246,0.623726,-1.073305,0.166613
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
902,21,logo_99,0.729620,-1.035586,0.390709,1.910684,-0.489203,2.197111,0.424709,-0.389426,...,1.130544,-1.892641,-0.690288,1.371062,0.718980,-0.827454,-0.557172,0.235162,0.523978,-0.592003
903,22,logo_99,1.147938,-0.799806,0.664411,1.610482,-0.612273,1.194499,0.076532,-0.662109,...,0.415811,-1.569571,-0.424470,0.518902,0.160598,-1.283986,-0.463894,0.319661,0.785325,-0.294870
904,22,logo_99,1.147938,-0.799806,0.664411,1.610482,-0.612273,1.194499,0.076532,-0.662109,...,0.415811,-1.569571,-0.424470,0.518902,0.160598,-1.283986,-0.463894,0.319661,0.785325,-0.294870
905,23,logo_99,1.645128,-1.911424,0.806118,0.850210,0.554424,1.578238,-0.722733,-0.704658,...,0.883759,-2.000007,-0.517095,2.213172,0.733567,-0.577074,-0.710312,-0.591997,0.836300,-0.720247
