In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset,  DataLoader
import torch
from pytorch_transformers import *
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from pytorch_pretrained_bert.optimization import BertAdam, WarmupLinearSchedule
print(device)
import math
from scipy import spatial
from torch.autograd import Variable



cuda


In [2]:
import csv
SemEval_prefix = "SemEval_"
import pandas as pd
import numpy as np

class Parse_data:

    def  __init__ (self, file_path, testing_emotions_path):
        self.file_path = file_path
        self.testing_emotions_path = testing_emotions_path
        self.sem_eval2007_id_to_emotions = self.get_sem_eval2007_id_to_emotions()
        self.train_data()
        self.normalize_data()

    def train_data(self):
        data_from_file = pd.read_csv(self.file_path)
        boolean_indexes = data_from_file["id"].str.startswith(SemEval_prefix)
        self.train_instances = data_from_file[~boolean_indexes]
        self.test_instances = data_from_file[boolean_indexes]

        extra_column = [np.nan] * len(self.test_instances)
        self.test_instances.insert(5, "Categorial", extra_column, True)

        for index in self.sem_eval2007_id_to_emotions:
            value = self.sem_eval2007_id_to_emotions[index]
            self.test_instances.loc[self.test_instances.id == SemEval_prefix + str(index), "Categorial"] = [value]

        self.test_instances = self.test_instances.dropna(axis=0, how='any')

        train_instances = list(range(len(self.train_instances)))
        self.train_instances.reindex(train_instances)
        test_instances = list(range(len(self.test_instances)))
        self.test_instances.reindex(test_instances)


    def normalize_data(self):

        #Train
        #mean = self.train_instances.mean()
        #std = self.train_instances.std()
        maxi = self.train_instances.max()
        mini = self.train_instances.min()

        current_v = self.train_instances["V"]
        normalized_v = 2*(current_v - mini["V"]) / (maxi["V"]-mini["V"]) - 1
        self.train_instances["V"] = normalized_v

        current_a = self.train_instances["A"]
        normalized_a =2* (current_a - mini["A"]) / (maxi["A"]-mini["A"]) -1 
        self.train_instances["A"] = normalized_a
        
        current_d = self.train_instances["D"]
        normalized_d = 2*(current_d - mini["D"]) / (maxi["D"]-mini["D"]) -1
        self.train_instances["D"] = normalized_d
        
        #Test
        #mean = self.test_instances.mean()
        #std = self.test_instances.std()
        maxi = self.test_instances.max()
        mini = self.test_instances.min()


        current_v = self.test_instances["V"]
        normalized_v = 2*(current_v - mini["V"]) / (maxi["V"]-mini["V"]) - 1
        self.test_instances["V"] = normalized_v

        current_a = self.test_instances["A"]
        normalized_a = 2* (current_a - mini["A"]) / (maxi["A"]-mini["A"]) -1 
        self.test_instances["A"] = normalized_a

        current_d = self.test_instances["D"]
        normalized_d = 2*(current_d - mini["D"]) / (maxi["D"]-mini["D"]) -1
        self.test_instances["D"] = normalized_d


    def get_train_data(self):

        return self.train_instances


    def get_id_to_sentence_dict(self):
        sentences = {}
        with open(self.file_path, encoding='utf8') as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=',')
            line_count = 0

            for row in csv_reader:
                if line_count == 0:
                    line_count += 1
                else:
                    if line_count == 393:
                        line_count += 1
                    sentences.update({row[0]: row[1]})
                    line_count += 1
        return sentences

    def get_sem_eval2007_id_to_emotions(self):
        sentences = {}
        with open(self.testing_emotions_path) as csv_file:
            csv_reader = csv.reader(csv_file, delimiter=' ')

            # row is in the form: 0: id  1: anger 2: disgust 3: fear 4: joy 5 :sadness 6: surprise
            for row in csv_reader:
                instance = {
                    "id": row[0],
                    "anger": row[1],
                    "disgust": row[2],
                    "fear": row[3],
                    "joy": row[4],
                    "sadness": row[5],
                    "surprise": row[6]
                }
                sentences.update({row[0]: instance})

        return sentences


    def get_test_data(self):
        return self.test_instances

# reader_path = "./corpus/reader.csv"
# writer_path = "./corpus/writer.csv"
# raw_path = "./corpus/raw.csv"
# sem2007_paths = "./affectivetext_test.emotions.gold"
#
# moss = parse_data(reader_path, writer_path, raw_path, sem2007_paths)
#
# a = moss.get_train_data()
# b = moss.get_test_data()
# a = 3

In [3]:
class EmoBankDatabase(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, file_path, emotions_path, is_testing = False):
        parse_data_instance = Parse_data(file_path, emotions_path)
        if is_testing:
                self.par_data = parse_data_instance.get_test_data()
        else:
            self.par_data = parse_data_instance.get_train_data()

    def __len__(self):
        return len(self.par_data)

    def __getitem__(self, idx):
        item = self.par_data.iloc[idx]
        return item.text, torch.tensor([item.V, item.A, item.D])

In [4]:
class EmobankFineTuningModule(torch.nn.Module):
    def __init__(self, model_class):
        """
        In the constructor we instantiate two nn.Linear modules and assign them as
        member variables.
        """
        super(EmobankFineTuningModule, self).__init__()

        self.bert = model_class
        self.linear2 = torch.nn.Linear(768, 3)

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        x = [get_all_hidden_states(instance, self.bert) for instance in x]
        x = [self.linear2(instance.view(-1, 768)) for instance in x]
        x = [torch.mean(instance, dim=0) for instance in x]
        x = torch.stack(x, dim=0, out=None)
        return x

In [6]:
def get_all_hidden_states(x, model):
    all_hidden_states, all_attentions = model(x)[-2:]

    return all_hidden_states[0]


def get_all_hidden_states(x, model):
    all_hidden_states, all_attentions = model(x)[-2:]

    return all_hidden_states[0]



def Train_Net(my_net, trainloader, criterion, tokenizer):
    #optimizer = torch.optim.SGD(my_net.parameters(), lr=0.01, momentum=0.9)
    optimizer = BertAdam(my_net.parameters(), lr=0.001, schedule='warmup_linear', warmup=0.1, t_total=23000000)

    # training loop
    for epoch in range(1000):
        if epoch > 0:
            path = "/home/dor/modelCosinev7" + str(epoch+83)
            path2 = "/home/dor/tokinezerv" + str(epoch+83)
            torch.save(my_net, path) 
            torch.save(tokenizer, path2) 
            print(epoch_loss/len(trainloader))
        epoch_loss = 0.0
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            
            inputs, labels = data
                                    
            labels = labels.to(device)
            embedded_input = [torch.tensor([tokenizer.encode(text)]).to(device) for text in inputs]
                
            #flags = Variable(torch.ones(len(embedded_input))).to(device)

            

            # forward + backward + optimize
            outputs = my_net(embedded_input)
            #loss = torch.mean(torch.sqrt(torch.sum((outputs - labels) ** 2,  dim=1))).to(device) 
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            # zero the parameter gradients
            optimizer.zero_grad()
            
            # print statistics
            running_loss += loss.item()
            epoch_loss += loss.item()
            if i % 200 == 0 and i != 0:  # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 200))
                running_loss = 0.0
    return my_net



In [7]:



##########################################################################



########################################################################## Set  basic parameters

file_path = "/home/dor/emobank.csv"
emotions_path = "/home/dor/affectivetext_test.emotions.csv"



MODELS = [(BertModel,       BertTokenizer,      'bert-base-uncased'),
          (OpenAIGPTModel,  OpenAIGPTTokenizer, 'openai-gpt'),
          (GPT2Model,       GPT2Tokenizer,      'gpt2'),
          (TransfoXLModel,  TransfoXLTokenizer, 'transfo-xl-wt103'),
          (XLNetModel,      XLNetTokenizer,     'xlnet-base-cased'),
          (XLMModel,        XLMTokenizer,        'xlm-mlm-enfr-1024')]

model_class, tokenizer_class, pretrained_weights = MODELS[0]

In [8]:
def main():
    ## Dataset
    emobank_dataset_training = EmoBankDatabase(file_path , emotions_path)
    emobank_dataset_training
    

    dataloader_training = DataLoader(emobank_dataset_training, batch_size=4,
                            shuffle=True, num_workers=12)

    ## Model
    bert_model = model_class.from_pretrained(pretrained_weights,
                                        output_hidden_states=True,
                                        output_attentions=True)
    bert_model.to(device)
    criterion = nn.MSELoss()
    model = EmobankFineTuningModule(bert_model)
    model.to(device)
    tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
    model = torch.load("/home/dor/modelCosinev783").to(device)
    tokenizer = torch.load("/home/dor/tokinezerv83")
    model = Train_Net(model, dataloader_training, criterion, tokenizer)
    #model = torch.load("/home/dor/model3")
    
#     parse_data_instance = Parse_data(file_path, emotions_path)
#     test = parse_data_instance.get_test_data()
#     dataloader_testing = DataLoader(test, batch_size=4,
#                             shuffle=True, num_workers=4)
    
#     for i, data in enumerate(dataloader_testing, 0):
#         inputs, labels = data
#         labels = labels.to(device)
#         embedded_input = [torch.tensor([tokenizer.encode(text)]).to(device) for text in inputs]
#         outputs = model(embedded_input)
#         print(outputs)
#         print(labels)
#         loss = criterion(outputs, labels)
#         print(loss)
    
    
    
    
 



if __name__ == '__main__':
    main()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


[1,   201] loss: 0.006
[1,   401] loss: 0.006
[1,   601] loss: 0.007
[1,   801] loss: 0.006
[1,  1001] loss: 0.005
[1,  1201] loss: 0.006
[1,  1401] loss: 0.006
[1,  1601] loss: 0.006
[1,  1801] loss: 0.005
[1,  2001] loss: 0.006
[1,  2201] loss: 0.007


  "type " + obj.__name__ + ". It won't be checked "


0.005957897496572423
[2,   201] loss: 0.006
[2,   401] loss: 0.006
[2,   601] loss: 0.006
[2,   801] loss: 0.006
[2,  1001] loss: 0.005
[2,  1201] loss: 0.006
[2,  1401] loss: 0.006
[2,  1601] loss: 0.005
[2,  1801] loss: 0.006
[2,  2001] loss: 0.006
[2,  2201] loss: 0.006
0.005833949378989843
[3,   201] loss: 0.006
[3,   401] loss: 0.006
[3,   601] loss: 0.006
[3,   801] loss: 0.005
[3,  1001] loss: 0.006
[3,  1201] loss: 0.006
[3,  1401] loss: 0.006
[3,  1601] loss: 0.005
[3,  1801] loss: 0.006
[3,  2001] loss: 0.006
[3,  2201] loss: 0.006
0.005758706089281995
[4,   201] loss: 0.006
[4,   401] loss: 0.006
[4,   601] loss: 0.006
[4,   801] loss: 0.005
[4,  1001] loss: 0.006
[4,  1201] loss: 0.005
[4,  1401] loss: 0.006
[4,  1601] loss: 0.005
[4,  1801] loss: 0.006
[4,  2001] loss: 0.005
[4,  2201] loss: 0.006
0.00568214510836349
[5,   201] loss: 0.006
[5,   401] loss: 0.006
[5,   601] loss: 0.005
[5,   801] loss: 0.006
[5,  1001] loss: 0.006
[5,  1201] loss: 0.006
[5,  1401] loss: 0.0

KeyboardInterrupt: 

In [9]:
model = torch.load("/home/dor/modelCosinev794")
tokenizer = torch.load("/home/dor/tokinezerv94")

In [10]:


emobank_dataset_testing = EmoBankDatabase(file_path , emotions_path, True)
dataloader_testing = DataLoader(emobank_dataset_testing, batch_size=1,
                        shuffle=True, num_workers=4)

number_of_instances = len(emobank_dataset_testing)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [11]:
criterion = nn.MSELoss()

In [12]:
sum_loss = 0
for i, data in enumerate(dataloader_testing, 0):
        # get the inputs
        inputs, labels = data
        
        labels = labels.to(device)
        embedded_input = [torch.tensor([tokenizer.encode(text)]).to(device) for text in inputs]
        model.eval()
        outputs = model(embedded_input)
        #flags = Variable(torch.ones(len(embedded_input))).to(device)
        loss = criterion(outputs, labels)
        print("outpus" + str(outputs))
        print("labels" + str(labels))
        print(torch.sqrt(torch.sum((outputs - labels) ** 2,  dim=1)))
        print(torch.mean(torch.sqrt(torch.sum((outputs - labels) ** 2,  dim=1))))
        print("loss" + str(loss))
        sum_loss = sum_loss + loss.item()
print("finished")


outpustensor([[-0.3349, -0.1034,  0.0812]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.5263,  0.0466, -0.1083]], device='cuda:0')
tensor([0.3083], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.3083, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0317, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[0.1666, 0.1396, 0.1136]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[0.1654, 0.3161, 0.5669]], device='cuda:0')
tensor([0.4864], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4864, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0789, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[0.2572, 0.2233, 0.3407]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.2481, -0.0363,  0.1465]], device='cuda:0')
tensor([0.3243], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.3243, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0351, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.1793, -0.8314, -

labelstensor([[0.2256, 0.3575, 0.5287]], device='cuda:0')
tensor([1.2584], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(1.2584, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.5279, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-1.1803,  0.4880,  0.1497]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.4737, -0.2642, -0.0828]], device='cuda:0')
tensor([1.0580], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(1.0580, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.3731, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.7967, -0.5390, -0.2391]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.5263, -0.1606, -0.3631]], device='cuda:0')
tensor([1.3816], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(1.3816, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.6363, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.6042,  0.2153, -0.4106]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.3008, -0.1

tensor(0.8887, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.2633, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.2747, -0.2144,  0.3932]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.1353, -0.3575,  0.2611]], device='cuda:0')
tensor([0.2394], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.2394, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0191, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.1031, -0.0212, -0.4222]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[0.1504, 0.0466, 0.5287]], device='cuda:0')
tensor([0.9864], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.9864, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.3244, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.1721, -0.3202,  0.1732]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.6767, -0.1606, -0.4904]], device='cuda:0')
tensor([0.8488], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.8488, device='cuda:

outpustensor([[-0.0945, -0.0781, -0.1195]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.1606, -0.2357]], device='cuda:0')
tensor([0.1710], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.1710, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0097, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.1470, -0.2482, -0.0608]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.1504, -0.1606,  0.2739]], device='cuda:0')
tensor([0.4562], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4562, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0694, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.0053, -0.2792,  0.2317]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.2642,  0.1465]], device='cuda:0')
tensor([0.0867], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.0867, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0025, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.1680, -

outpustensor([[-0.3172, -0.1323, -0.0365]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.3759, -0.2642,  0.0191]], device='cuda:0')
tensor([0.1548], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.1548, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0080, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.2374, -0.7801, -0.0470]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.2642,  0.3631]], device='cuda:0')
tensor([0.7004], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.7004, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1635, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.1075, -0.1846,  0.2382]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.3308, -0.1503,  0.5669]], device='cuda:0')
tensor([0.3988], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.3988, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0530, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.3911,  

losstensor(0.1678, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.4982, -0.9307,  0.4025]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.4715,  0.1465]], device='cuda:0')
tensor([0.7243], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.7243, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1749, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.0956, -0.4014,  0.1980]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.1880, -0.9067, -0.0064]], device='cuda:0')
tensor([0.5528], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.5528, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1019, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.3946,  0.0881, -0.1672]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.5865,  0.3161,  0.0064]], device='cuda:0')
tensor([0.3448], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.3448, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0396, device

outpustensor([[-0.0084, -0.0950,  0.1064]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0902, -0.1399,  0.1465]], device='cuda:0')
tensor([0.1156], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.1156, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0045, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.3534, -0.4721, -0.2220]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.2256, -0.0570,  0.2739]], device='cuda:0')
tensor([0.8679], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.8679, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.2511, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.3767, -0.4009, -0.0293]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.6767,  0.3575, -0.6178]], device='cuda:0')
tensor([1.0058], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(1.0058, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.3372, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.1100, -

labelstensor([[ 0.0827, -0.3782,  0.2866]], device='cuda:0')
tensor([0.3184], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.3184, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0338, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.2593, -0.6881,  0.0673]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.5648,  0.1465]], device='cuda:0')
tensor([0.2979], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.2979, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0296, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.3085,  0.0307, -0.1039]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.1504, -0.2642,  0.2739]], device='cuda:0')
tensor([0.6636], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.6636, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1468, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.0780,  0.0608,  0.0460]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -

outpustensor([[-0.0722, -0.3501, -0.0687]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.1053, -0.4093, -0.2229]], device='cuda:0')
tensor([0.1685], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.1685, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0095, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.1100, -0.3049, -0.0449]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.3782,  0.1465]], device='cuda:0')
tensor([0.2326], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.2326, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0180, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[0.9102, 0.2952, 0.6297]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.3759, -0.0570,  0.4013]], device='cuda:0')
tensor([0.6794], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.6794, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1539, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.4150, -0.6

outpustensor([[ 0.0726,  0.1236, -0.1700]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.9023,  0.2539, -0.6178]], device='cuda:0')
tensor([1.0806], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(1.0806, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.3893, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.6243,  0.1585,  0.1742]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.1654, -0.0363,  0.1465]], device='cuda:0')
tensor([0.4993], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4993, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0831, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.1587, -0.2449,  0.1691]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0902, -0.5233,  0.2994]], device='cuda:0')
tensor([0.3955], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.3955, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0521, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.2118, -

labelstensor([[ 0.0000, -0.3679,  0.2739]], device='cuda:0')
tensor([0.7579], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.7579, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1915, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[0.1725, 0.2142, 0.2171]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.3679,  0.0191]], device='cuda:0')
tensor([0.6386], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.6386, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1359, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.1211, -0.0479,  0.3610]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[0.3008, 0.0466, 0.5287]], device='cuda:0')
tensor([0.4636], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4636, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0717, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.5320,  0.5599,  0.6392]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.3008, -0.1606

outpustensor([[ 0.2172, -0.3869, -0.0841]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.0902, -0.1295, -0.1720]], device='cuda:0')
tensor([0.4105], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4105, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0562, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[0.1845, 0.0847, 0.0402]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[0.1504, 0.1503, 0.1465]], device='cuda:0')
tensor([0.1295], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.1295, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0056, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.1121, -0.2198, -0.0899]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.6015, -0.4715, -0.1083]], device='cuda:0')
tensor([0.5506], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.5506, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1011, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.2516, -0.1746

losstensor(0.0115, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.3451, -0.0867,  0.5056]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0902, -0.3886,  0.6306]], device='cuda:0')
tensor([0.4144], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4144, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0573, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[0.9586, 0.5109, 0.6009]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.1504, -0.1606,  0.0191]], device='cuda:0')
tensor([1.4210], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(1.4210, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.6731, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.3134,  0.6274,  0.3433]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.3759, -0.2642, -0.0064]], device='cuda:0')
tensor([0.9598], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.9598, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.3071, device='c

outpustensor([[-0.1092, -0.0211,  0.0813]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.4286, -0.2642, -0.0318]], device='cuda:0')
tensor([0.4170], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4170, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0580, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.0540, -0.2915,  0.0624]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.7098,  0.3248]], device='cuda:0')
tensor([0.4968], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4968, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0823, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[0.1884, 0.2739, 0.0930]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0752, -0.1606,  0.2739]], device='cuda:0')
tensor([0.4841], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4841, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0781, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[0.3069, 0.052

tensor(0.1789, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0107, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.0336,  0.1221,  0.2600]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.2642,  0.4650]], device='cuda:0')
tensor([0.4387], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4387, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0641, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.0868, -0.0703, -0.1033]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[0.3759, 0.2539, 0.6178]], device='cuda:0')
tensor([0.8419], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.8419, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.2363, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[0.3289, 0.6694, 0.5381]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.1504, -0.0570,  0.4013]], device='cuda:0')
tensor([0.7604], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.7604, device='cuda:0',

outpustensor([[-0.2844, -0.3836,  0.4045]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.4715,  0.1465]], device='cuda:0')
tensor([0.3939], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.3939, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0517, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.1378,  0.2227,  0.0840]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.3679,  0.1465]], device='cuda:0')
tensor([0.6096], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.6096, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1239, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.1458, -0.3225, -0.2275]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.4286, -0.2642,  0.1465]], device='cuda:0')
tensor([0.4725], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4725, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0744, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.3477, -

tensor([0.2628], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.2628, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0230, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.0513, -0.0789, -0.0404]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.2256, -0.0570,  0.4013]], device='cuda:0')
tensor([0.4753], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4753, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0753, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.3997, -0.3525, -0.3131]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0752, -0.4715,  0.2739]], device='cuda:0')
tensor([0.7644], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.7644, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1948, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.0051,  0.0441, -0.2832]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.0902, -0.5233, -0.0064]], device='cuda:0')
tensor([0.6370], device='

tensor([0.5793], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.5793, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1119, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.2116, -0.2095, -0.1011]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.1504, -0.1606, -0.1083]], device='cuda:0')
tensor([0.0786], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.0786, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0021, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.3278,  0.1913, -0.1262]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.3308, -0.2642,  0.4268]], device='cuda:0')
tensor([0.7164], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.7164, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1711, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.2635, -0.2758,  0.3494]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.3008, -0.1606,  0.5287]], device='cuda:0')
tensor([0.2163], device='

outpustensor([[-0.1177, -0.5597,  0.3165]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.8271, -0.2642, -0.4904]], device='cuda:0')
tensor([1.1143], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(1.1143, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.4139, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.1876,  0.3600, -0.3291]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.2256,  0.2539,  0.0191]], device='cuda:0')
tensor([0.3660], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.3660, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0447, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.4547, -0.3074, -0.0976]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0752, -0.2642,  0.2739]], device='cuda:0')
tensor([0.6486], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.6486, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1402, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.2518, -

tensor([0.4233], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4233, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0597, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.4017, -0.0709,  0.0064]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.0752,  0.1503,  0.4013]], device='cuda:0')
tensor([0.5581], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.5581, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1038, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.3279,  0.3050,  0.3310]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.2256, -0.0570,  0.0191]], device='cuda:0')
tensor([0.4887], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4887, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0796, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.1503, -0.0942,  0.0080]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.1504,  0.4611,  0.2739]], device='cuda:0')
tensor([0.6157], device='

outpustensor([[-0.2925,  0.4271, -0.0149]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.3008, -0.1606,  0.1465]], device='cuda:0')
tensor([0.6095], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.6095, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1238, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[0.0943, 0.7870, 0.7198]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.3679,  0.2739]], device='cuda:0')
tensor([1.2416], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(1.2416, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.5138, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.4982, -0.3943,  0.0270]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.6767, -0.0570, -1.0000]], device='cuda:0')
tensor([1.5965], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(1.5965, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.8496, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.0425, -0.4

outpustensor([[ 0.2031, -0.1902, -0.1543]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.0902, -0.3886,  0.1465]], device='cuda:0')
tensor([0.4646], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4646, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0720, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.1167, -0.1464, -0.0163]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.1192,  0.1465]], device='cuda:0')
tensor([0.2021], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.2021, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0136, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.0146, -0.6312,  0.1103]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.4662, -0.1399, -0.0064]], device='cuda:0')
tensor([0.6973], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.6973, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1621, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.8283, -

losstensor(0.0132, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.1029, -0.9067,  0.1733]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0677, -0.1710,  0.4904]], device='cuda:0')
tensor([0.8191], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.8191, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.2237, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.0555, -0.0845,  0.2402]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0902, -0.3886, -0.0064]], device='cuda:0')
tensor([0.3930], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.3930, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0515, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.3775, -0.1690, -0.1215]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0752, -0.2642, -0.1083]], device='cuda:0')
tensor([0.3173], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.3173, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0336, device

tensor(1.2953, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.5592, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.1747, -0.2505,  0.1173]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.4404, -0.0701]], device='cuda:0')
tensor([0.3189], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.3189, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0339, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[0.2814, 0.4099, 0.0996]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.3679,  0.1465]], device='cuda:0')
tensor([0.8284], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.8284, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.2288, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.0379, -0.5356, -0.4837]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.1504, -0.3679,  0.0191]], device='cuda:0')
tensor([0.5624], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.5624, device='cuda:

outpustensor([[0.1554, 0.0457, 0.5187]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.1654, -0.3782,  0.4268]], device='cuda:0')
tensor([0.4339], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4339, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0628, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.5661, -0.0516,  0.1915]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[0.3008, 0.0466, 0.5287]], device='cuda:0')
tensor([0.4401], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4401, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0646, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[0.5997, 0.0237, 0.3239]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.2256, -0.4715,  0.4013]], device='cuda:0')
tensor([0.6254], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.6254, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1304, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.1562, -0.1542, -

losstensor(0.3058, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.0684, -0.2939, -0.0123]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.1504, -0.0570,  0.2739]], device='cuda:0')
tensor([0.4311], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4311, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0620, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.2455,  0.3480,  0.0783]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.1504, -0.2642,  0.1465]], device='cuda:0')
tensor([0.6234], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.6234, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1295, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.2780,  0.3859,  0.1003]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.5639, -0.0052,  0.6178]], device='cuda:0')
tensor([0.7089], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.7089, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1675, device

tensor(0.7670, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1961, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.0773,  0.0403, -0.0604]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.3233, -0.1192,  0.1465]], device='cuda:0')
tensor([0.3588], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.3588, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0429, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.1099, -0.1270,  0.0432]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[0.0000, 0.0363, 0.5159]], device='cuda:0')
tensor([0.5121], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.5121, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0874, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.1340,  0.0307, -0.0193]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.1504, -0.2642,  0.1465]], device='cuda:0')
tensor([0.3388], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.3388, device='cuda:

losstensor(0.0474, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.1987, -0.3206, -0.2939]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.2642,  0.5287]], device='cuda:0')
tensor([0.8481], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.8481, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.2398, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.2283, -0.7837, -0.4384]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.1504, -0.1606,  0.2739]], device='cuda:0')
tensor([0.9495], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.9495, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.3005, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.2090, -0.0363, -0.1751]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.1504, -0.4715, -0.1083]], device='cuda:0')
tensor([0.4442], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4442, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0658, device

losstensor(0.0790, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.1503, -0.2166,  0.0463]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.4286,  0.1813, -0.0318]], device='cuda:0')
tensor([0.4918], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4918, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0806, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.1143, -0.3794, -0.2217]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.1880, -0.2642,  0.3121]], device='cuda:0')
tensor([0.5510], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.5510, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1012, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.2417, -0.5302, -0.0635]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.3008, -0.2642,  0.4013]], device='cuda:0')
tensor([0.7622], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.7622, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1937, device

losstensor(0.1177, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[0.3743, 0.2868, 0.0078]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.2642, -0.1083]], device='cuda:0')
tensor([0.6761], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.6761, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1524, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.0772, -0.0260, -0.1818]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.2642, -0.0828]], device='cuda:0')
tensor([0.2693], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.2693, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0242, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.1122, -0.1084,  0.1964]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.3759,  0.0466, -0.2357]], device='cuda:0')
tensor([0.5293], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.5293, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0934, device='c

losstensor(0.3374, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.2649,  0.1274,  0.0229]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.5263, -0.1606, -0.1083]], device='cuda:0')
tensor([0.4105], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.4105, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0562, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[0.4002, 0.0659, 0.0655]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.3782,  0.2866]], device='cuda:0')
tensor([0.6374], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.6374, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1354, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.1891, -0.2468,  0.2261]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.1192,  0.1465]], device='cuda:0')
tensor([0.2417], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.2417, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0195, device='c

outpustensor([[ 0.2115,  0.5984, -0.0683]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.3008, -0.1606,  0.4013]], device='cuda:0')
tensor([0.8970], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.8970, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.2682, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.0838,  0.5366,  0.5327]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.0902, -0.3886, -0.4904]], device='cuda:0')
tensor([1.3794], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(1.3794, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.6343, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.1530, -0.7039, -0.1341]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.1504, -0.1606,  0.2739]], device='cuda:0')
tensor([0.6794], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.6794, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.1539, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.1038, -

tensor([0.8840], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.8840, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.2605, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-0.7440, -1.1739, -1.1062]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0000, -0.7824, -0.0191]], device='cuda:0')
tensor([1.3743], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(1.3743, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.6295, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[ 0.0480, -0.2047,  0.0571]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[ 0.0752, -0.4715,  0.2739]], device='cuda:0')
tensor([0.3449], device='cuda:0', grad_fn=<SqrtBackward>)
tensor(0.3449, device='cuda:0', grad_fn=<MeanBackward1>)
losstensor(0.0396, device='cuda:0', grad_fn=<MseLossBackward>)
outpustensor([[-1.2198, -0.3735, -0.8452]], device='cuda:0', grad_fn=<StackBackward>)
labelstensor([[-0.4135, -0.5440,  0.0318]], device='cuda:0')
tensor([1.2034], device='

In [13]:
print(i)
print ("loss = " + str (sum_loss / i))

949
loss = 0.15924595648640458


In [14]:
categories_vad = {
    "joy":{
        "v":0.76,
        "a":0.48,
        "d":0.35
    },
    "anger":{
        "v":-0.51,
        "a":0.59,
        "d":0.25
    },
      "sadness":{
        "v":-0.63,
        "a":-0.27,
        "d":-0.33
    },
      "fear":{
        "v":-0.64,
        "a":0.6,
        "d":-0.43
    },
      "disgust":{
        "v":-0.6,
        "a":0.35,
        "d":0.11
    },
    "surprise":{
        "v":0.4,
        "a":0.67,
        "d":-0.13
    }
}


index_to_catogory = {
    0 : "joy",
    1 : "anger",
    2 : "sadness",
    3 : "fear",
    4 : "disgust",
    5 : "surprise",
}

category_to_index = ["joy", "anger", "sadness", "fear", "disgust", "surprise"]


In [15]:
def get_category(model_output):
    model_output = model_output[0]
    categories_distance = []
    for i, category in enumerate(category_to_index):
        # index: 0:V, 1:A, 2:D
        current_category_vad = categories_vad[category]
        a = np.empty(3)
        a[0] = current_category_vad["v"]
        a[1] = current_category_vad["a"]
        a[2] = current_category_vad["d"]
        distance_from_category = get_distance(a,model_output.to("cpu").detach().numpy())
        categories_distance.append(distance_from_category)
#     print(category_to_index)
#     print(categories_distance)
    return index_to_catogory[categories_distance.index(min(categories_distance))]
        

In [16]:
def get_distance(a, b):
    return (np.square(a - b)).mean(axis=None)
#math.sqrt((current_category_vad["v"] - model_output[0])**2 + (current_category_vad["a"] - model_output[1])**2 + (current_category_vad["d"] - model_output[2])**2)

In [17]:
def get_instance_category(test_instance):
    max_value_first = -1
    max_value_second = -1
    max_category_first = "moss"
    max_category_second = "moss"
    instance_categories = test_instance["Categorial"]
    for category in instance_categories:
        if float(instance_categories[category]) > max_value_second and category != "id":
            max_value_first = max_value_second
            max_value_second = float(instance_categories[category])
            max_category_first = max_category_second
            max_category_second = category
    return [max_category_first]
    

In [18]:
parse_data_instance = Parse_data(file_path, emotions_path)
test = parse_data_instance.get_test_data()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [19]:

total_count = 0
correct_count = 0
    
for i, test_insance in test.iterrows():
    text = test_insance["text"]
    embeded_text = torch.tensor([tokenizer.encode(text)]).to(device)
    outputs = model([embeded_text])

#     print(test_insance)
#     print("item" + str(test_insance))
#    outputs = [torch.tensor([test_insance.V, test_insance.A, test_insance.D])]

#     print("outputs: " + str(outputs))

    category = get_category(outputs)
    instance_actual_category = get_instance_category(test_insance)

#     print("actual: " + instance_actual_category + ", category:" + str(category))
#     if category == instance_actual_category:
    if category in instance_actual_category:
        correct_count += 1 
    total_count += 1


In [20]:
print("correctness: " + str(correct_count / total_count))

correctness: 0.14105263157894737
