## Imports

In [1]:
import random
import os
from os.path import join

import torch
import torch.nn as nn

import numpy as np

## Models

In [2]:

class MovieToLatent(nn.Module):
    
    def __init__(self, init_size: int, k: int):
        super(MovieToLatent, self).__init__()
        
        self.k = k
        
        self.dense1 = nn.Linear(init_size, 2)
        self.dense2 = nn.Linear(2, 3)
        self.dense3 = nn.Linear(3, k)
        
    
    def forward(self, x):
        
        x = torch.tanh(self.dense1(x))
        x = self.dense2(x)
        x = self.dense3(x)
        return x


## count sketch

In [3]:

class VaultBasedOnCountSketch:

    def __init__(self, w: int, d: int):

        data = [[0 for _ in range(w)] for _ in range(d)]

        self.data = data
        self.w = w
        self.d = d

        self.index_str_to_hashs = {}
        self.index_str_to_positive_negative = {}

    def store_number(self, tensor_id: int, i_index: int, j_index: int, number_to_store: float, fixed_size: int = 8):

        index_str = str(tensor_id) + "_" + str(i_index) + "_" + str(j_index)

        if index_str not in self.index_str_to_hashs:
            self.index_str_to_hashs[index_str] = [hash(index_str * (i + 1)) for i in range(self.d)]
            self.index_str_to_positive_negative[index_str] = [1 if hash("min_" + (index_str * (i + 1))) % 2 else -1 for
                                                              i in range(self.d)]

        hashes = self.index_str_to_hashs[index_str]
        positivity = self.index_str_to_positive_negative[index_str]

        number_to_store = int(number_to_store * (10 ** fixed_size))

        for i in range(self.d):
            _hash = hashes[i]
            _positive = positivity[i]

            index = _hash % self.w

            self.data[i][index] += number_to_store * _positive

    def get_count_of_line(self, tensor_id: int, i_index: int, j_index: int, fixed_size: int = 8):

        arr = []
        for row in range(self.d):

            index_str = str(tensor_id) + "_" + str(i_index) + "_" + str(j_index)
            hashes = self.index_str_to_hashs[index_str][row]
            positivity = self.index_str_to_positive_negative[index_str][row]

            col = hashes % self.w
            arr.append((float(self.data[row][col] * positivity) / (10 ** fixed_size)))

        arr.sort()

        return arr[len(arr) // 2]


In [4]:

def get_vault(gradient, w, d):
    
    vault = VaultBasedOnCountSketch(w, d)

    for index, tensor_params in enumerate(gradient):

        params = tensor_params.tolist()
        size = len(tensor_params.shape)

        if size == 2:
            for i in range(tensor_params.shape[0]):

                for j in range(tensor_params.shape[1]):
                    vault.store_number(index, i, j, params[i][j])
        else:
            for i in range(tensor_params.shape[0]):
                vault.store_number(index, i, 0, params[i])


    return vault


In [5]:
def set_gradient(vault, gradient):
    
    for index, tensor_params in enumerate(gradient):

        params = tensor_params.tolist()
        size = len(tensor_params.shape)

        if size == 2:
            for i in range(tensor_params.shape[0]):

                for j in range(tensor_params.shape[1]):
                    tensor_params[i][j] = vault.get_count_of_line(index, i, j)
        else:
            for i in range(tensor_params.shape[0]):
                tensor_params[i] = vault.get_count_of_line(index, i, 0, )
    

## Data Creation

In [6]:
data_length = 100
init_size = 10
out_put_length = 5
X = torch.tensor([[random.random() for i in range(init_size)] for j in range(data_length)])
Y = torch.tensor([[(3 * X[i][0]) + (0.5 * X[i][1]), 
                   (11 * X[i][3]) + (3.5 * X[i][9]),
                   (1.5 * X[i][2]) + (3 * X[i][4]) + (6 * X[i][5]),
                   (2.5 * X[i][6]) + (3.5 * X[i][7]),
                   (15 * X[i][8]) + (7.3 * X[i][3])] for i in range(data_length)])
Y = torch.tensor([[1.0, 
                   2.0,
                   3.0,
                   4.0,
                   5.0] for i in range(data_length)])

## Models Object

In [7]:

def length_of_weight(weights):

    to_return = 1
    for s in weights.shape:
        to_return *= s
    return to_return


def set_item(weights_list, index_of_weight, index_in_weight_list, weight, index_in_weight):

    shape_size = len(weight.shape)

    if shape_size == 4:

        index_0 = index_in_weight % weight.shape[0]
        index_in_weight //= weight.shape[0]

        index_1 = index_in_weight % weight.shape[1]
        index_in_weight //= weight.shape[1]

        index_2 = index_in_weight % weight.shape[2]
        index_in_weight //= weight.shape[2]

        index_3 = index_in_weight % weight.shape[3]
        index_in_weight //= weight.shape[3]

        weight[index_0][index_1][index_2][index_3] = weights_list[index_0][index_1][index_2][index_3]

    elif shape_size == 2:

        index_0 = index_in_weight % weight.shape[0]
        index_in_weight //= weight.shape[0]

        index_1 = index_in_weight % weight.shape[1]
        index_in_weight //= weight.shape[1]

        weight[index_0][index_1] = weights_list[index_0][index_1]

    elif shape_size == 1:

        weight[index_in_weight] = weights_list[index_in_weight]

    else:
        raise Exception("not supported !!!")

    return 0 


In [8]:
def insert_weights(weights_list: list, model):

    weight_list_index = 0
    index_of_weight = 0

    model_data = model.state_dict()

    _index = 0
    for key, weight in model_data.items():

        weight = np.zeros(weight.shape)
        weight_index = 0
        length = length_of_weight(weight)

        for index in range(length):

           next_weight_needed = set_item(weights_list[_index],index_of_weight, weight_list_index,weight, weight_index)
           weight_index += 1
           weight_list_index += 1
           if next_weight_needed:
                index_of_weight += 1
                weight_list_index = 0

        model_data[key].cpu().detach()
        model_data[key] = torch.tensor(weight).cuda()

        _index += 1

    model.load_state_dict(model_data)

In [9]:
def get_model_weight_data(model):
    
    to_return = []
    
    for item in model.parameters():
        to_return.append(np.copy(item.detach().numpy()))


    return to_return

In [10]:
def get_gradient(steped_model, real_model):
    
    to_return = []
    
    for i in range(len(real_model)):
        
        to_return.append(real_model[i] - steped_model[i])
        
    return to_return

In [11]:
def create_new_weight_based_on_gradient(model, gradient):
    
    to_return = []
    
    for i in range(len(model)):
        
        to_return.append(model[i] + gradient[i])
        
    return to_return
    

In [12]:
movie_model = MovieToLatent(init_size, out_put_length)

## Check learning process 

In [13]:
optim_base = torch.optim.Adam(movie_model.parameters(), lr=0.001, betas=(0.9, 0.98), eps=1e-9)
mse = nn.MSELoss()
    
current_model_data = get_model_weight_data(movie_model)
    
_Y = movie_model(X)
loss = mse(_Y, Y)


optim_base.zero_grad()
loss.backward()
    

optim_base.step()

steped_model_data = get_model_weight_data(movie_model)

In [14]:
gradient = get_gradient(steped_model_data, current_model_data)

In [15]:
gradient

[array([[0.001     , 0.00099999, 0.001     , 0.001     , 0.001     ,
         0.001     , 0.00099999, 0.001     , 0.001     , 0.001     ],
        [0.001     , 0.001     , 0.00099999, 0.001     , 0.001     ,
         0.001     , 0.001     , 0.00099999, 0.001     , 0.001     ]],
       dtype=float32),
 array([0.001, 0.001], dtype=float32),
 array([[-0.00099999, -0.00099999],
        [ 0.00099999,  0.00099999],
        [ 0.00099999,  0.00099999]], dtype=float32),
 array([ 0.00099999, -0.001     , -0.00099999], dtype=float32),
 array([[ 0.00099999, -0.00099999, -0.00099999],
        [ 0.001     , -0.001     , -0.00099999],
        [ 0.001     , -0.001     , -0.00099999],
        [ 0.001     , -0.00099999, -0.00099999],
        [ 0.00099999, -0.00099999, -0.001     ]], dtype=float32),
 array([-0.00099999, -0.00099999, -0.001     , -0.00099999, -0.001     ],
       dtype=float32)]

In [16]:
vault = get_vault(gradient, 25, 100)

In [17]:
set_gradient(vault, gradient)

In [18]:
gradient

[array([[0.001     , 0.00099998, 0.001     , 0.001     , 0.00099999,
         0.001     , 0.00099998, 0.001     , 0.001     , 0.001     ],
        [0.001     , 0.001     , 0.00099998, 0.001     , 0.001     ,
         0.00100002, 0.001     , 0.00099998, 0.001     , 0.001     ]],
       dtype=float32),
 array([0.001, 0.001], dtype=float32),
 array([[-0.00099998, -0.00099998],
        [ 0.00099998,  0.00099998],
        [ 0.00099998,  0.00099998]], dtype=float32),
 array([ 0.00099998, -0.001     , -0.00099998], dtype=float32),
 array([[ 0.00099996, -0.00099998, -0.00099998],
        [ 0.001     , -0.001     , -0.00099998],
        [ 0.001     , -0.001     , -0.00099998],
        [ 0.001     , -0.00099998, -0.00099998],
        [ 0.00099998, -0.00099998, -0.001     ]], dtype=float32),
 array([-0.00099998, -0.00099998, -0.00099999, -0.00099998, -0.001     ],
       dtype=float32)]

## Learning on one computer

In [19]:

w = 1000
d = 20

iterations = 1000

for iteration in range(iterations):
    
    optim_base = torch.optim.Adam(movie_model.parameters(), lr=0.01, betas=(0.9, 0.98), eps=1e-9)
    mse = nn.MSELoss()
    
    # get current model poarams state
    current_model_data = get_model_weight_data(movie_model)
    
    # Learning step process
    _Y = movie_model(X)
    loss = mse(_Y, Y)
    optim_base.zero_grad()
    loss.backward()
    optim_base.step()
    
    # get model state after one step
    steped_model_data = get_model_weight_data(movie_model)
    
    # get gradient 
    gradient = get_gradient(current_model_data, steped_model_data)
    
    # create a vault
    vault = get_vault(gradient, w, d)
    
    # generate new gradient from the vault
    set_gradient(vault, gradient)
    
    # create new weight
    new_weight = create_new_weight_based_on_gradient(gradient, current_model_data)
    
    # insert new weight to model
    
    insert_weights(new_weight, movie_model)
    
    print(loss)
    
    


tensor(10.8189, grad_fn=<MseLossBackward>)
tensor(10.5292, grad_fn=<MseLossBackward>)
tensor(10.2234, grad_fn=<MseLossBackward>)
tensor(9.9027, grad_fn=<MseLossBackward>)
tensor(9.5687, grad_fn=<MseLossBackward>)
tensor(9.2230, grad_fn=<MseLossBackward>)
tensor(8.8673, grad_fn=<MseLossBackward>)
tensor(8.5035, grad_fn=<MseLossBackward>)
tensor(8.1333, grad_fn=<MseLossBackward>)
tensor(7.7585, grad_fn=<MseLossBackward>)
tensor(7.3810, grad_fn=<MseLossBackward>)
tensor(7.0023, grad_fn=<MseLossBackward>)
tensor(6.6241, grad_fn=<MseLossBackward>)
tensor(6.2480, grad_fn=<MseLossBackward>)
tensor(5.8753, grad_fn=<MseLossBackward>)
tensor(5.5075, grad_fn=<MseLossBackward>)
tensor(5.1457, grad_fn=<MseLossBackward>)
tensor(4.7913, grad_fn=<MseLossBackward>)
tensor(4.4453, grad_fn=<MseLossBackward>)
tensor(4.1088, grad_fn=<MseLossBackward>)
tensor(3.7828, grad_fn=<MseLossBackward>)
tensor(3.4684, grad_fn=<MseLossBackward>)
tensor(3.1664, grad_fn=<MseLossBackward>)
tensor(2.8778, grad_fn=<MseLoss

tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBac

tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBac

tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBac

tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBackward>)
tensor(0.0050, grad_fn=<MseLossBackward>)
tensor(0.0045, grad_fn=<MseLossBac

In [20]:

movie_model(X)


tensor([[1.0211, 2.0025, 3.0314, 4.0895, 5.1072],
        [1.0216, 2.0027, 3.0322, 4.0911, 5.1087],
        [1.0206, 2.0040, 3.0331, 4.0920, 5.1105],
        [1.0218, 2.0071, 3.0388, 4.1024, 5.1218],
        [1.0213, 1.9989, 3.0264, 4.0812, 5.0974],
        [1.0210, 2.0003, 3.0280, 4.0837, 5.1006],
        [1.0221, 2.0088, 3.0416, 4.1073, 5.1272],
        [1.0216, 2.0066, 3.0379, 4.1007, 5.1199],
        [1.0220, 2.0068, 3.0386, 4.1022, 5.1214],
        [1.0231, 1.9981, 3.0268, 4.0831, 5.0983],
        [1.0209, 2.0034, 3.0326, 4.0913, 5.1095],
        [1.0219, 2.0077, 3.0398, 4.1042, 5.1237],
        [1.0221, 2.0082, 3.0406, 4.1057, 5.1253],
        [1.0146, 1.9651, 2.9704, 3.9823, 4.9873],
        [1.0217, 2.0052, 3.0359, 4.0975, 5.1161],
        [1.0212, 2.0040, 3.0336, 4.0932, 5.1116],
        [1.0221, 2.0087, 3.0415, 4.1071, 5.1270],
        [1.0213, 2.0065, 3.0375, 4.0998, 5.1191],
        [1.0219, 2.0055, 3.0366, 4.0987, 5.1174],
        [1.0221, 2.0063, 3.0380, 4.1013, 5.1202],


In [21]:
gradient

[array([[-0.00999999, -0.00999997, -0.00999999, -0.00999999, -0.00999999,
         -0.00999999, -0.00999999, -0.00999999, -0.00999999, -0.00999999],
        [-0.00999999, -0.00999999, -0.00999999, -0.00999999, -0.00999999,
         -0.00999999, -0.00999999, -0.00999999, -0.00999999, -0.00999999]],
       dtype=float32),
 array([-0.00999999, -0.00999999], dtype=float32),
 array([[ 0.00999999,  0.00999999],
        [-0.00999999, -0.00999999],
        [-0.00999999, -0.00999999]], dtype=float32),
 array([-0.00999999,  0.00999999,  0.00999999], dtype=float32),
 array([[-0.00999999,  0.01      ,  0.00999999],
        [-0.00999999,  0.00999999,  0.00999999],
        [-0.00999999,  0.00999999,  0.01      ],
        [-0.01      ,  0.00999999,  0.00999999],
        [-0.00999999,  0.00999999,  0.00999999]], dtype=float32),
 array([0.00999999, 0.00999999, 0.00999999, 0.01      , 0.01      ],
       dtype=float32)]

## Multiple computers - toy example

In [22]:
number_of_computers = 10
data_length_per_computer = 100
init_size = 10
out_put_length = 5

X = [torch.tensor([[random.random() for i in range(init_size)] for i in range(data_length_per_computer) ]) \
                                                                                for j in range(number_of_computers)]

Y = [torch.tensor([[(3 * X[j][i][0]) + (0.5 * X[j][i][1]), 
                   (11 * X[j][i][3]) + (3.5 * X[j][i][9]),
                   (1.5 * X[j][i][2]) + (3 * X[j][i][4]) + (6 * X[j][i][5]),
                   (2.5 * X[j][i][6]) + (3.5 * X[j][i][7]),
                   (15 * X[j][i][8]) + (7.3 * X[j][i][3])] for i in range(data_length_per_computer)]) \
                                                                      for j in range(number_of_computers)]

models = [MovieToLatent(init_size, out_put_length) for i in range(number_of_computers)]


model_structure = models[0].state_dict()

for i in range(1, number_of_computers):
    models[i].load_state_dict(model_structure)
    


In [23]:
def multi_party_training(models, X, Y, number_of_computers):
    
    iterations = 1000
    for iteration in range(iterations):
        
        vaults = []
        gradients = []
        
        # get current model poarams state
        # in every starting iteration all models should have the same weights params
        current_model_data = get_model_weight_data(models[0])
        
        # all different servers learning on thier own on thier private data and creating a vault
        for computer_index in range(number_of_computers):
            
            x = X[computer_index]
            y = Y[computer_index]
            model = models[computer_index]
            
            optim_base = torch.optim.Adam(model.parameters(), lr=0.01, betas=(0.9, 0.98), eps=1e-9)
            mse = nn.MSELoss()

            # Learning step process
            _y = model(x)
            loss = mse(_y, y)
            optim_base.zero_grad()
            loss.backward()
            optim_base.step()

            # get model state after one step
            steped_model_data = get_model_weight_data(model)

            # get gradient 
            gradient = get_gradient(current_model_data, steped_model_data)
            
            gradients.append(gradient)
            
            # create a vault
            vault = get_vault(gradient, w, d)
            
            # send my vault to the master server
            vaults.append(vault)
            
        # create new gradient based on all inputs
        
        for computer_index in range(number_of_computers):
            set_gradient(vaults[computer_index], gradients[computer_index])
        
        new_weight = create_new_weight_based_on_gradient(gradients[0], gradients[1])
        for computer_index in range(2, number_of_computers):
            new_weight = create_new_weight_based_on_gradient(new_weight, gradients[computer_index])
            
        for index in range(len(new_weight)):
            new_weight[index] /= number_of_computers
        
        # create new weight
        new_weight = create_new_weight_based_on_gradient(current_model_data, new_weight)
        
        # insert new weight to model
        for computer_index in range(number_of_computers):
            insert_weights(new_weight, models[computer_index])

        print(loss)
  

In [24]:
multi_party_training(models, X, Y, number_of_computers)

tensor(59.7649, grad_fn=<MseLossBackward>)
tensor(59.5087, grad_fn=<MseLossBackward>)
tensor(59.2375, grad_fn=<MseLossBackward>)
tensor(58.9506, grad_fn=<MseLossBackward>)
tensor(58.6449, grad_fn=<MseLossBackward>)
tensor(58.3163, grad_fn=<MseLossBackward>)
tensor(57.9643, grad_fn=<MseLossBackward>)
tensor(57.5895, grad_fn=<MseLossBackward>)
tensor(57.1934, grad_fn=<MseLossBackward>)
tensor(56.7770, grad_fn=<MseLossBackward>)
tensor(56.3382, grad_fn=<MseLossBackward>)
tensor(55.8775, grad_fn=<MseLossBackward>)
tensor(55.3969, grad_fn=<MseLossBackward>)
tensor(54.8989, grad_fn=<MseLossBackward>)
tensor(54.3849, grad_fn=<MseLossBackward>)
tensor(53.8559, grad_fn=<MseLossBackward>)
tensor(53.3113, grad_fn=<MseLossBackward>)
tensor(52.7478, grad_fn=<MseLossBackward>)
tensor(52.1619, grad_fn=<MseLossBackward>)
tensor(51.5547, grad_fn=<MseLossBackward>)
tensor(50.9271, grad_fn=<MseLossBackward>)
tensor(50.2800, grad_fn=<MseLossBackward>)
tensor(49.6146, grad_fn=<MseLossBackward>)
tensor(48.9

tensor(4.0827, grad_fn=<MseLossBackward>)
tensor(4.0613, grad_fn=<MseLossBackward>)
tensor(4.0764, grad_fn=<MseLossBackward>)
tensor(4.0543, grad_fn=<MseLossBackward>)
tensor(4.0703, grad_fn=<MseLossBackward>)
tensor(4.0474, grad_fn=<MseLossBackward>)
tensor(4.0635, grad_fn=<MseLossBackward>)
tensor(4.0395, grad_fn=<MseLossBackward>)
tensor(4.0577, grad_fn=<MseLossBackward>)
tensor(4.0356, grad_fn=<MseLossBackward>)
tensor(4.0517, grad_fn=<MseLossBackward>)
tensor(4.0311, grad_fn=<MseLossBackward>)
tensor(4.0470, grad_fn=<MseLossBackward>)
tensor(4.0230, grad_fn=<MseLossBackward>)
tensor(4.0409, grad_fn=<MseLossBackward>)
tensor(4.0196, grad_fn=<MseLossBackward>)
tensor(4.0354, grad_fn=<MseLossBackward>)
tensor(4.0149, grad_fn=<MseLossBackward>)
tensor(4.0298, grad_fn=<MseLossBackward>)
tensor(4.0072, grad_fn=<MseLossBackward>)
tensor(4.0244, grad_fn=<MseLossBackward>)
tensor(4.0040, grad_fn=<MseLossBackward>)
tensor(4.0194, grad_fn=<MseLossBackward>)
tensor(3.9996, grad_fn=<MseLossBac

tensor(3.6888, grad_fn=<MseLossBackward>)
tensor(3.6809, grad_fn=<MseLossBackward>)
tensor(3.6858, grad_fn=<MseLossBackward>)
tensor(3.6790, grad_fn=<MseLossBackward>)
tensor(3.6835, grad_fn=<MseLossBackward>)
tensor(3.6732, grad_fn=<MseLossBackward>)
tensor(3.6826, grad_fn=<MseLossBackward>)
tensor(3.6715, grad_fn=<MseLossBackward>)
tensor(3.6796, grad_fn=<MseLossBackward>)
tensor(3.6695, grad_fn=<MseLossBackward>)
tensor(3.6765, grad_fn=<MseLossBackward>)
tensor(3.6678, grad_fn=<MseLossBackward>)
tensor(3.6743, grad_fn=<MseLossBackward>)
tensor(3.6650, grad_fn=<MseLossBackward>)
tensor(3.6724, grad_fn=<MseLossBackward>)
tensor(3.6631, grad_fn=<MseLossBackward>)
tensor(3.6694, grad_fn=<MseLossBackward>)
tensor(3.6614, grad_fn=<MseLossBackward>)
tensor(3.6665, grad_fn=<MseLossBackward>)
tensor(3.6579, grad_fn=<MseLossBackward>)
tensor(3.6651, grad_fn=<MseLossBackward>)
tensor(3.6551, grad_fn=<MseLossBackward>)
tensor(3.6629, grad_fn=<MseLossBackward>)
tensor(3.6532, grad_fn=<MseLossBac

tensor(3.5640, grad_fn=<MseLossBackward>)
tensor(3.5499, grad_fn=<MseLossBackward>)
tensor(3.5631, grad_fn=<MseLossBackward>)
tensor(3.5484, grad_fn=<MseLossBackward>)
tensor(3.5638, grad_fn=<MseLossBackward>)
tensor(3.5486, grad_fn=<MseLossBackward>)
tensor(3.5632, grad_fn=<MseLossBackward>)
tensor(3.5486, grad_fn=<MseLossBackward>)
tensor(3.5626, grad_fn=<MseLossBackward>)
tensor(3.5490, grad_fn=<MseLossBackward>)
tensor(3.5621, grad_fn=<MseLossBackward>)
tensor(3.5461, grad_fn=<MseLossBackward>)
tensor(3.5621, grad_fn=<MseLossBackward>)
tensor(3.5475, grad_fn=<MseLossBackward>)
tensor(3.5620, grad_fn=<MseLossBackward>)
tensor(3.5462, grad_fn=<MseLossBackward>)
tensor(3.5613, grad_fn=<MseLossBackward>)
tensor(3.5469, grad_fn=<MseLossBackward>)
tensor(3.5608, grad_fn=<MseLossBackward>)
tensor(3.5473, grad_fn=<MseLossBackward>)
tensor(3.5601, grad_fn=<MseLossBackward>)
tensor(3.5446, grad_fn=<MseLossBackward>)
tensor(3.5603, grad_fn=<MseLossBackward>)
tensor(3.5456, grad_fn=<MseLossBac

tensor(3.5388, grad_fn=<MseLossBackward>)
tensor(3.5250, grad_fn=<MseLossBackward>)
tensor(3.5386, grad_fn=<MseLossBackward>)
tensor(3.5254, grad_fn=<MseLossBackward>)
tensor(3.5386, grad_fn=<MseLossBackward>)
tensor(3.5253, grad_fn=<MseLossBackward>)
tensor(3.5385, grad_fn=<MseLossBackward>)
tensor(3.5253, grad_fn=<MseLossBackward>)
tensor(3.5384, grad_fn=<MseLossBackward>)
tensor(3.5252, grad_fn=<MseLossBackward>)
tensor(3.5386, grad_fn=<MseLossBackward>)
tensor(3.5249, grad_fn=<MseLossBackward>)
tensor(3.5385, grad_fn=<MseLossBackward>)
tensor(3.5249, grad_fn=<MseLossBackward>)
tensor(3.5382, grad_fn=<MseLossBackward>)
tensor(3.5255, grad_fn=<MseLossBackward>)
tensor(3.5384, grad_fn=<MseLossBackward>)
tensor(3.5232, grad_fn=<MseLossBackward>)
tensor(3.5388, grad_fn=<MseLossBackward>)
tensor(3.5224, grad_fn=<MseLossBackward>)
tensor(3.5384, grad_fn=<MseLossBackward>)
tensor(3.5232, grad_fn=<MseLossBackward>)
tensor(3.5381, grad_fn=<MseLossBackward>)
tensor(3.5226, grad_fn=<MseLossBac

tensor(3.5271, grad_fn=<MseLossBackward>)
tensor(3.5083, grad_fn=<MseLossBackward>)
tensor(3.5267, grad_fn=<MseLossBackward>)
tensor(3.5080, grad_fn=<MseLossBackward>)
tensor(3.5266, grad_fn=<MseLossBackward>)
tensor(3.5080, grad_fn=<MseLossBackward>)
tensor(3.5266, grad_fn=<MseLossBackward>)
tensor(3.5080, grad_fn=<MseLossBackward>)
tensor(3.5258, grad_fn=<MseLossBackward>)
tensor(3.5074, grad_fn=<MseLossBackward>)
tensor(3.5263, grad_fn=<MseLossBackward>)
tensor(3.5078, grad_fn=<MseLossBackward>)
tensor(3.5255, grad_fn=<MseLossBackward>)
tensor(3.5075, grad_fn=<MseLossBackward>)
