<a href="https://colab.research.google.com/github/adeotti/sudoku-env/blob/main/M9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
# python version 3.9.11 
%pip install numpy==1.25.2 tensorboard==2.17.0 torchrl==0.4.0 gymnasium==0.29.1 tensordict==0.4.0

from IPython.display import clear_output
def clear():
  clear_output(wait=False)

import math,sys 
import torch

clear()

In [36]:
easyBoard = torch.tensor([
    [0, 0, 0, 5, 3, 1, 0, 0, 0],
    [0, 0, 0, 0, 4, 0, 3, 0, 1],
    [1, 0, 0, 8, 0, 0, 0, 0, 0],
    [0, 0, 4, 0, 0, 5, 6, 0, 0],
    [0, 0, 3, 9, 0, 2, 1, 4, 0],
    [6, 1, 5, 0, 7, 0, 0, 9, 8],
    [0, 2, 0, 0, 9, 6, 0, 1, 0],
    [0, 5, 7, 2, 0, 8, 0, 0, 6],
    [0, 6, 1, 7, 5, 3, 0, 2, 4]])

solution = torch.tensor([
    [8, 4, 9, 5, 3, 1, 7, 6, 2],
    [5, 7, 2, 6, 4, 9, 3, 8, 1],
    [1, 3, 6, 8, 2, 7, 4, 5, 9],
    [2, 9, 4, 1, 8, 5, 6, 7, 3],
    [7, 8, 3, 9, 6, 2, 1, 4, 5],
    [6, 1, 5, 3, 7, 4, 2, 9, 8],
    [3, 2, 8, 7, 9, 5, 1, 6, 7],
    [4, 5, 7, 2, 1, 8, 9, 3, 6],
    [9, 6, 1, 7, 5, 3, 8, 2, 4]])

Game and Utility class 

In [None]:
from dataclasses import dataclass

@dataclass(frozen=True)
class Board_specs:
  size: tuple = (9,9)
  low: int = 1
  high: int = 9

class Game:
    def __init__(self,action = None):
        self.board = easyBoard.clone()
        self.action = action
        self.reward = 0
        self.done = torch.equal(solution.clone(),self.board)

        self.modifiableCells = []
        
        for i,x in enumerate(self.board):
            for y in range(Board_specs.high): 
                if x[y] == 0: 
                    self.modifiableCells.append((i,y))    

    def Updated_board(self):
        if self.action is not None:
            row,column,value = self.action
            if (row,column) in self.modifiableCells:

                x = self.board[row].tolist()
                x.pop(column)
            
                y = [element[column].item() for element in self.board]
                y.pop(row)
                    
                n = int(math.sqrt(Board_specs.high))
                ix,iy = (self.action[0]//n)* n , (self.action[1]//n)* n
                region = torch.flatten(self.board[ix:ix+n , iy:iy+n]).tolist()

                local_row = row - ix
                local_col = column - iy
                action_index = local_row * n + local_col
                region_ = [num for idx, num in enumerate(region) if idx != action_index]

                sector = [x,y,region_]
                sector = [item for sublist in sector for item in sublist]
                sector_ = [element for element in sector if element !=0] # filtered the zeros

                if not value in sector_:
                    self.board[row][column] = value
                    self.reward +=10

                    if self.done :
                        self.reward+= 100
                    return self.board,self.reward,self.done
                
                else :
                    self.reward -= 10
                return self.board,self.reward,self.done

            else:
                self.reward -=10
        return self.board,self.reward,self.done
    
    def reset(self):
        self.board = easyBoard 
        return easyBoard

Environment

In [82]:
from torchrl.envs import EnvBase
from torchrl.data import BoundedTensorSpec,CompositeSpec
from tensordict import TensorDictBase,TensorDict

class environment(EnvBase):
    def __init__(self):
        super().__init__()

        self.action = None
        self.game = Game(self.action)
        self.updatedBoard,self.reward,self.done = self.game.Updated_board()

        self.action_spec = BoundedTensorSpec(
            low=[[0,0,1]],
            high=[[9,9,9]],
            shape=torch.Size([1,3]),  
            dtype=torch.int
        )

        self.observation_format = BoundedTensorSpec(
            low=1.0,
            high=9.0,
            shape=(easyBoard).unsqueeze(0).shape,
            dtype=torch.float32
        )
        self.observation_spec = CompositeSpec(observation = self.observation_format)

    def _step(self,tensordict) -> TensorDictBase :
        self.action = tensordict["action"][0] # original shape -> tensor([[x, y, value]])
        self.game = Game(self.action)
        self.updated,self.reward,self.done = Game(self.action).Updated_board()
         

        output = TensorDict(
            {
                "observation" : self.updatedBoard.clone().detach().unsqueeze(0).float(),
                "reward" : self.reward,
                "done" : self.done
            }
        )
        return output

    def _reset(self,tensordict,**kwargs) -> TensorDictBase :  
        output = TensorDict(
            {
                "observation" :  self.updatedBoard.clone().detach().unsqueeze(0).float()
                }
        )
        return output

    def _set_seed(self):
        pass

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical

from tensordict.nn import TensorDictModule

from torchrl.modules import ValueOperator,ProbabilisticActor
from torchrl.objectives.value import GAE
from torchrl.collectors import SyncDataCollector

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hypers
l_rate = 0.01
sdg_momentum = 0.9

frames =  1000            # number of steps
sub_frame = 50              # for the most inner loop of the training step
total_frames = 100_000     # maximum steps

gamma = 0.80
lmbda = 0.99

env = environment()
dummy_observation = env._reset(None)["observation"] 

@torch.no_grad()
def weights_init(w):
  if isinstance(w,(nn.Conv2d,nn.LazyConv2d,nn.LazyLinear)):
    nn.init.kaiming_uniform(w.weight,mode="fan_in",nonlinearity="relu")
    if w.bias is not None : nn.init.zeros_(w.bias)

def networkInit(network : nn.Module):
  network.to(device)
  network.forward(dummy_observation)
  network.apply(weights_init)
  return network

In [7]:
import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)

class Mask: 
  # This will alter the softmax distribution so value in [x,y,value] != 0 
  def __init__(self):
    self.newValue = -float("inf")

  def apply(self,tensor : torch.FloatTensor):
    self.mask = torch.zeros_like(tensor,dtype=torch.bool)
    self.mask[-1,-1,0] = True
    tensor = tensor.masked_fill(mask=self.mask,value=self.newValue)
    return tensor


class ActorNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    self.size = 81
    self.outputShape = 27 # 3*9 = 27 haha
    self.outputReshaped = (3,9)
    self.mask = Mask()

    self.input_layer = nn.LazyLinear(81)
    self.flat = nn.Flatten()
    self.dense_one = nn.LazyLinear(self.size)
    self.dense_two = nn.LazyLinear(self.size)
    self.output = nn.LazyLinear(self.outputShape)

  def forward(self,x):
    x = self.flat(x)
    x = F.relu(self.input_layer(x))
    x = F.relu(self.dense_one(x))
    x = F.relu(self.dense_two(x))
    x = F.relu(self.output(x))
    x = torch.unflatten(x,-1,(self.outputReshaped))
    x = self.mask.apply(x)
    return F.softmax(x,-1)

Actor = networkInit(network=ActorNetwork()) 

Policy = TensorDictModule(
  module=Actor, 
  in_keys=["observation"],
  out_keys=["probs"]
)

PolicyModule = ProbabilisticActor(
  module=Policy ,
  spec=env.action_spec,in_keys=["probs"],
  distribution_class = Categorical,
  return_log_prob = True
)

In [None]:
Collector = SyncDataCollector(
    create_env_fn=env,
    policy=PolicyModule,
    frames_per_batch=frames,
    total_frames=total_frames,
)
x = Collector.rollout()["next","reward"] 
print(x.max())
sys.exit()
#clear()

In [None]:
class ValueNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    self.size = 81
    self.input_layer = nn.LazyLinear(self.size)
    self.flat = nn.Flatten()
    self.dense_one = nn.LazyLinear(self.size)
    self.dense_two = nn.LazyLinear(self.size)
    self.output = nn.LazyLinear(1)

  def forward(self,x):
    x = self.flat(x)
    x = F.relu(self.input_layer(x))
    x = F.relu(self.dense_one(x))
    x = F.relu(self.dense_two(x))
    return self.output(x)

Critic = networkInit(network=ValueNetwork())

ValueModule = ValueOperator(
  module= Critic,
  in_keys=["observation"]
)

Advantage = GAE(
  gamma=gamma,
  lmbda=lmbda,
  value_network=ValueModule,
  average_gae=True,
  device=device
)

Training loop

In [None]:
from torchrl.data import ReplayBuffer,SamplerWithoutReplacement,LazyTensorStorage
from torchrl.objectives import ClipPPOLoss
from tqdm import tqdm
from collections import deque
from torch.utils.tensorboard import SummaryWriter

class Training:
    def __init__(self):
        self.collector = Collector
        self.epochs = 10
        self.valuemodule = ValueModule
        self.advantage = Advantage

        self.policy = PolicyModule

        self.lossfunction = ClipPPOLoss(
            actor_network=PolicyModule,
            critic_network=ValueModule,
            entropy_coef=0.01
        )
        self.optimizer = torch.optim.Adam(
            params=self.lossfunction.parameters(),
            lr=l_rate
        )
        self.memory = ReplayBuffer(
            storage=LazyTensorStorage(max_size=frames),
            sampler=SamplerWithoutReplacement()
        )

    def save_logs(self):
            log_dir = "trainingData/new/"  
            self.writer = SummaryWriter(log_dir)

    def save_weight(self):
        path = "trainingData/new/actor_100k.pth"  
        torch.save(self.policy.state_dict(),path)
    
    def fullyTrainedmodel(self):
        path = "trainingData/new/fullyTrainedmodel.pth"
        torch.save(self.policy.state_dict(),path)

    def train(self,start : bool = None):
        if start:
            bestReward = -20
            rewardHistory = deque(maxlen = 10)
            self.save_logs()

            for i,data_tensordict in tqdm(enumerate(self.collector),total = total_frames/frames):
                
                for _ in range(self.epochs):
                    self.advantage(data_tensordict) # this apply the advantage compute and modify data_tensordict
                    data_tensordict["advantage"] = data_tensordict["advantage"].unsqueeze(-1)
                    self.memory.extend(data_tensordict)
              
                    for _ in  range(frames//sub_frame):
                        subdata = self.memory.sample(sub_frame)

                        loss_val = self.lossfunction(subdata)
                        loss_value = (
                            loss_val["loss_objective"] + 
                            loss_val["loss_critic"] + 
                            loss_val["loss_entropy"]
                        )
                        print(loss_value)
                        self.optimizer.zero_grad()
                        loss_value.backward()
                        self.optimizer.step()

                self.writer.add_scalar("main/batch_number",i)
                self.writer.add_scalar("main/Advantage",data_tensordict["advantage"][0].item())
                self.writer.add_scalar("main/reward",data_tensordict["next"]["reward"][0].mean().item())
               
                self.writer.add_scalar("loss/Entropy",loss_val["loss_entropy"].item())
                self.writer.add_scalar("loss/Loss_sum",loss_value.item())
                self.writer.add_scalar("loss/Loss_entropy",(loss_val["loss_entropy"].item()))
                self.writer.add_scalar("loss/Loss_critic",loss_val["loss_critic"].item())
                self.writer.add_scalar("loss/Loss_objective",loss_val["loss_objective"].item())

                currentReward = data_tensordict["next"]["reward"][0].mean()
                rewardHistory.append(currentReward)
                averageReward = sum(rewardHistory)/len(rewardHistory)
                self.writer.add_scalar("main/Average reward",averageReward)

                if averageReward > bestReward:
                    self.save_weight()
                    bestReward = averageReward

            self.fullyTrainedmodel() 

Training().train(start=True)

  0%|          | 0/100.0 [00:00<?, ?it/s]

tensor(2.5323, grad_fn=<AddBackward0>)
tensor(8.5019, grad_fn=<AddBackward0>)
tensor(2.8243, grad_fn=<AddBackward0>)
tensor(3.4332, grad_fn=<AddBackward0>)
tensor(4.2531, grad_fn=<AddBackward0>)
tensor(4.6681, grad_fn=<AddBackward0>)
tensor(3.5717, grad_fn=<AddBackward0>)
tensor(2.5140, grad_fn=<AddBackward0>)
tensor(3.7791, grad_fn=<AddBackward0>)
tensor(3.7225, grad_fn=<AddBackward0>)
tensor(3.9238, grad_fn=<AddBackward0>)
tensor(2.5626, grad_fn=<AddBackward0>)
tensor(2.4578, grad_fn=<AddBackward0>)
tensor(2.9876, grad_fn=<AddBackward0>)
tensor(3.0970, grad_fn=<AddBackward0>)
tensor(3.2333, grad_fn=<AddBackward0>)
tensor(2.7403, grad_fn=<AddBackward0>)
tensor(2.8980, grad_fn=<AddBackward0>)
tensor(2.7342, grad_fn=<AddBackward0>)
tensor(4.2444, grad_fn=<AddBackward0>)
tensor(3.1839, grad_fn=<AddBackward0>)
tensor(3.3180, grad_fn=<AddBackward0>)
tensor(2.8533, grad_fn=<AddBackward0>)
tensor(3.1324, grad_fn=<AddBackward0>)
tensor(2.1903, grad_fn=<AddBackward0>)
tensor(2.6580, grad_fn=<A

  1%|          | 1/100.0 [00:05<08:49,  5.35s/it]

tensor(3.7869, grad_fn=<AddBackward0>)
tensor(2.2491, grad_fn=<AddBackward0>)
tensor(2.8140, grad_fn=<AddBackward0>)
tensor(2.6402, grad_fn=<AddBackward0>)
tensor(2.9678, grad_fn=<AddBackward0>)
tensor(2.9583, grad_fn=<AddBackward0>)
tensor(2.0975, grad_fn=<AddBackward0>)
tensor(2.2369, grad_fn=<AddBackward0>)
tensor(2.5846, grad_fn=<AddBackward0>)
tensor(2.8823, grad_fn=<AddBackward0>)
tensor(3.0546, grad_fn=<AddBackward0>)
tensor(2.6596, grad_fn=<AddBackward0>)
tensor(2.9999, grad_fn=<AddBackward0>)
tensor(2.7229, grad_fn=<AddBackward0>)
tensor(2.4940, grad_fn=<AddBackward0>)
tensor(2.8262, grad_fn=<AddBackward0>)
tensor(2.4150, grad_fn=<AddBackward0>)
tensor(2.7791, grad_fn=<AddBackward0>)
tensor(2.8155, grad_fn=<AddBackward0>)
tensor(2.6475, grad_fn=<AddBackward0>)
tensor(2.6336, grad_fn=<AddBackward0>)
tensor(2.9920, grad_fn=<AddBackward0>)
tensor(3.4382, grad_fn=<AddBackward0>)
tensor(2.4289, grad_fn=<AddBackward0>)
tensor(2.8069, grad_fn=<AddBackward0>)
tensor(2.5163, grad_fn=<A

  2%|▏         | 2/100.0 [00:10<08:37,  5.28s/it]

tensor(2.5804, grad_fn=<AddBackward0>)
tensor(2.5411, grad_fn=<AddBackward0>)
tensor(2.5730, grad_fn=<AddBackward0>)
tensor(2.6116, grad_fn=<AddBackward0>)
tensor(2.9647, grad_fn=<AddBackward0>)
tensor(2.7874, grad_fn=<AddBackward0>)
tensor(2.0525, grad_fn=<AddBackward0>)
tensor(2.5697, grad_fn=<AddBackward0>)
tensor(2.7131, grad_fn=<AddBackward0>)
tensor(2.2620, grad_fn=<AddBackward0>)
tensor(2.2363, grad_fn=<AddBackward0>)
tensor(2.2109, grad_fn=<AddBackward0>)
tensor(2.9222, grad_fn=<AddBackward0>)
tensor(2.9116, grad_fn=<AddBackward0>)
tensor(2.7920, grad_fn=<AddBackward0>)
tensor(2.5098, grad_fn=<AddBackward0>)
tensor(2.2377, grad_fn=<AddBackward0>)
tensor(2.4609, grad_fn=<AddBackward0>)
tensor(2.1257, grad_fn=<AddBackward0>)
tensor(2.3598, grad_fn=<AddBackward0>)
tensor(2.4270, grad_fn=<AddBackward0>)
tensor(2.2755, grad_fn=<AddBackward0>)
tensor(2.4045, grad_fn=<AddBackward0>)
tensor(2.4754, grad_fn=<AddBackward0>)
tensor(2.3217, grad_fn=<AddBackward0>)
tensor(2.2374, grad_fn=<A

  3%|▎         | 3/100.0 [00:15<08:31,  5.28s/it]

tensor(2.3120, grad_fn=<AddBackward0>)
tensor(2.7621, grad_fn=<AddBackward0>)
tensor(2.7251, grad_fn=<AddBackward0>)
tensor(2.7410, grad_fn=<AddBackward0>)
tensor(2.6011, grad_fn=<AddBackward0>)
tensor(2.8377, grad_fn=<AddBackward0>)
tensor(2.3288, grad_fn=<AddBackward0>)
tensor(2.0857, grad_fn=<AddBackward0>)
tensor(2.2081, grad_fn=<AddBackward0>)
tensor(2.8262, grad_fn=<AddBackward0>)
tensor(2.8453, grad_fn=<AddBackward0>)
tensor(3.3175, grad_fn=<AddBackward0>)
tensor(2.7973, grad_fn=<AddBackward0>)
tensor(2.5622, grad_fn=<AddBackward0>)
tensor(3.4719, grad_fn=<AddBackward0>)
tensor(3.3192, grad_fn=<AddBackward0>)
tensor(2.5210, grad_fn=<AddBackward0>)
tensor(3.0592, grad_fn=<AddBackward0>)
tensor(2.8159, grad_fn=<AddBackward0>)
tensor(3.0601, grad_fn=<AddBackward0>)
tensor(2.7102, grad_fn=<AddBackward0>)
tensor(2.7103, grad_fn=<AddBackward0>)
tensor(3.3757, grad_fn=<AddBackward0>)
tensor(2.7582, grad_fn=<AddBackward0>)
tensor(2.7472, grad_fn=<AddBackward0>)
tensor(2.4401, grad_fn=<A

  4%|▍         | 4/100.0 [00:21<08:22,  5.23s/it]

tensor(3.2571, grad_fn=<AddBackward0>)
tensor(2.2967, grad_fn=<AddBackward0>)
tensor(2.5478, grad_fn=<AddBackward0>)
tensor(3.1249, grad_fn=<AddBackward0>)
tensor(3.1425, grad_fn=<AddBackward0>)
tensor(2.9209, grad_fn=<AddBackward0>)
tensor(3.1151, grad_fn=<AddBackward0>)
tensor(2.7674, grad_fn=<AddBackward0>)
tensor(3.1681, grad_fn=<AddBackward0>)
tensor(3.4724, grad_fn=<AddBackward0>)
tensor(3.0047, grad_fn=<AddBackward0>)
tensor(2.6648, grad_fn=<AddBackward0>)
tensor(2.4119, grad_fn=<AddBackward0>)
tensor(2.6167, grad_fn=<AddBackward0>)
tensor(2.0934, grad_fn=<AddBackward0>)
tensor(2.8732, grad_fn=<AddBackward0>)
tensor(3.1666, grad_fn=<AddBackward0>)
tensor(2.8911, grad_fn=<AddBackward0>)
tensor(2.3935, grad_fn=<AddBackward0>)
tensor(2.9016, grad_fn=<AddBackward0>)
tensor(3.0614, grad_fn=<AddBackward0>)
tensor(3.0102, grad_fn=<AddBackward0>)
tensor(2.6924, grad_fn=<AddBackward0>)
tensor(2.8423, grad_fn=<AddBackward0>)
tensor(2.8839, grad_fn=<AddBackward0>)
tensor(2.0852, grad_fn=<A

  5%|▌         | 5/100.0 [00:25<08:07,  5.13s/it]

tensor(3.0234, grad_fn=<AddBackward0>)
tensor(2.7298, grad_fn=<AddBackward0>)
tensor(2.6550, grad_fn=<AddBackward0>)
tensor(2.4663, grad_fn=<AddBackward0>)
tensor(2.5933, grad_fn=<AddBackward0>)
tensor(2.6108, grad_fn=<AddBackward0>)
tensor(2.5596, grad_fn=<AddBackward0>)
tensor(2.8661, grad_fn=<AddBackward0>)
tensor(2.5667, grad_fn=<AddBackward0>)
tensor(2.7904, grad_fn=<AddBackward0>)
tensor(2.7717, grad_fn=<AddBackward0>)
tensor(2.8750, grad_fn=<AddBackward0>)
tensor(2.7531, grad_fn=<AddBackward0>)
tensor(2.5561, grad_fn=<AddBackward0>)
tensor(2.4649, grad_fn=<AddBackward0>)
tensor(2.4339, grad_fn=<AddBackward0>)
tensor(2.6001, grad_fn=<AddBackward0>)
tensor(2.5487, grad_fn=<AddBackward0>)
tensor(2.8459, grad_fn=<AddBackward0>)
tensor(2.6684, grad_fn=<AddBackward0>)
tensor(2.2276, grad_fn=<AddBackward0>)
tensor(2.6873, grad_fn=<AddBackward0>)
tensor(2.4044, grad_fn=<AddBackward0>)
tensor(2.6872, grad_fn=<AddBackward0>)
tensor(2.5134, grad_fn=<AddBackward0>)
tensor(2.5379, grad_fn=<A

  6%|▌         | 6/100.0 [00:31<07:59,  5.10s/it]

tensor(2.5037, grad_fn=<AddBackward0>)
tensor(2.5107, grad_fn=<AddBackward0>)
tensor(2.8927, grad_fn=<AddBackward0>)
tensor(3.4939, grad_fn=<AddBackward0>)
tensor(3.7910, grad_fn=<AddBackward0>)
tensor(2.8914, grad_fn=<AddBackward0>)
tensor(2.8734, grad_fn=<AddBackward0>)
tensor(3.2387, grad_fn=<AddBackward0>)
tensor(2.8247, grad_fn=<AddBackward0>)
tensor(3.0728, grad_fn=<AddBackward0>)
tensor(2.4824, grad_fn=<AddBackward0>)
tensor(2.8152, grad_fn=<AddBackward0>)
tensor(2.9186, grad_fn=<AddBackward0>)
tensor(2.8801, grad_fn=<AddBackward0>)
tensor(2.8584, grad_fn=<AddBackward0>)
tensor(2.4003, grad_fn=<AddBackward0>)
tensor(3.2913, grad_fn=<AddBackward0>)
tensor(3.5795, grad_fn=<AddBackward0>)
tensor(3.7961, grad_fn=<AddBackward0>)
tensor(3.2976, grad_fn=<AddBackward0>)
tensor(3.3984, grad_fn=<AddBackward0>)
tensor(3.8039, grad_fn=<AddBackward0>)
tensor(3.3875, grad_fn=<AddBackward0>)
tensor(3.0777, grad_fn=<AddBackward0>)
tensor(2.6008, grad_fn=<AddBackward0>)
tensor(2.7383, grad_fn=<A

  7%|▋         | 7/100.0 [00:36<07:56,  5.13s/it]

tensor(2.8390, grad_fn=<AddBackward0>)
tensor(2.6078, grad_fn=<AddBackward0>)
tensor(2.6493, grad_fn=<AddBackward0>)
tensor(3.5792, grad_fn=<AddBackward0>)
tensor(2.9513, grad_fn=<AddBackward0>)
tensor(2.7570, grad_fn=<AddBackward0>)
tensor(2.7854, grad_fn=<AddBackward0>)
tensor(3.1099, grad_fn=<AddBackward0>)
tensor(3.5857, grad_fn=<AddBackward0>)
tensor(2.9314, grad_fn=<AddBackward0>)
tensor(2.9744, grad_fn=<AddBackward0>)
tensor(2.0190, grad_fn=<AddBackward0>)
tensor(2.8783, grad_fn=<AddBackward0>)
tensor(3.4427, grad_fn=<AddBackward0>)
tensor(3.5141, grad_fn=<AddBackward0>)
tensor(2.6320, grad_fn=<AddBackward0>)
tensor(2.6499, grad_fn=<AddBackward0>)
tensor(2.7353, grad_fn=<AddBackward0>)
tensor(3.4369, grad_fn=<AddBackward0>)
tensor(2.9308, grad_fn=<AddBackward0>)
tensor(3.1409, grad_fn=<AddBackward0>)
tensor(2.6989, grad_fn=<AddBackward0>)
tensor(3.1045, grad_fn=<AddBackward0>)
tensor(3.5318, grad_fn=<AddBackward0>)
tensor(3.1579, grad_fn=<AddBackward0>)
tensor(3.2769, grad_fn=<A

  8%|▊         | 8/100.0 [00:41<07:59,  5.21s/it]

tensor(3.1365, grad_fn=<AddBackward0>)
tensor(2.9629, grad_fn=<AddBackward0>)
tensor(2.9599, grad_fn=<AddBackward0>)
tensor(3.3237, grad_fn=<AddBackward0>)
tensor(2.9663, grad_fn=<AddBackward0>)
tensor(3.2604, grad_fn=<AddBackward0>)
tensor(3.5877, grad_fn=<AddBackward0>)
tensor(2.9966, grad_fn=<AddBackward0>)
tensor(3.2458, grad_fn=<AddBackward0>)
tensor(3.0115, grad_fn=<AddBackward0>)
tensor(2.9216, grad_fn=<AddBackward0>)
tensor(2.7418, grad_fn=<AddBackward0>)
tensor(3.1030, grad_fn=<AddBackward0>)
tensor(2.5844, grad_fn=<AddBackward0>)
tensor(2.2294, grad_fn=<AddBackward0>)
tensor(2.5637, grad_fn=<AddBackward0>)
tensor(2.9129, grad_fn=<AddBackward0>)
tensor(2.3926, grad_fn=<AddBackward0>)
tensor(2.0853, grad_fn=<AddBackward0>)
tensor(2.4495, grad_fn=<AddBackward0>)
tensor(2.9990, grad_fn=<AddBackward0>)
tensor(2.7636, grad_fn=<AddBackward0>)
tensor(2.3709, grad_fn=<AddBackward0>)
tensor(2.3560, grad_fn=<AddBackward0>)
tensor(3.2028, grad_fn=<AddBackward0>)
tensor(2.5301, grad_fn=<A

  9%|▉         | 9/100.0 [00:47<08:09,  5.38s/it]

tensor(2.4668, grad_fn=<AddBackward0>)
tensor(2.6128, grad_fn=<AddBackward0>)
tensor(2.5490, grad_fn=<AddBackward0>)
tensor(2.6315, grad_fn=<AddBackward0>)
tensor(2.3160, grad_fn=<AddBackward0>)
tensor(2.3699, grad_fn=<AddBackward0>)
tensor(2.7011, grad_fn=<AddBackward0>)
tensor(2.6805, grad_fn=<AddBackward0>)
tensor(2.3503, grad_fn=<AddBackward0>)
tensor(2.0573, grad_fn=<AddBackward0>)
tensor(2.6065, grad_fn=<AddBackward0>)
tensor(2.8790, grad_fn=<AddBackward0>)
tensor(2.3760, grad_fn=<AddBackward0>)
tensor(2.9881, grad_fn=<AddBackward0>)
tensor(2.3357, grad_fn=<AddBackward0>)
tensor(2.9543, grad_fn=<AddBackward0>)
tensor(3.3531, grad_fn=<AddBackward0>)
tensor(2.8749, grad_fn=<AddBackward0>)
tensor(3.0191, grad_fn=<AddBackward0>)
tensor(3.1376, grad_fn=<AddBackward0>)
tensor(3.3418, grad_fn=<AddBackward0>)
tensor(2.8784, grad_fn=<AddBackward0>)
tensor(3.3247, grad_fn=<AddBackward0>)
tensor(3.0237, grad_fn=<AddBackward0>)
tensor(2.7841, grad_fn=<AddBackward0>)
tensor(2.3937, grad_fn=<A

 10%|█         | 10/100.0 [00:52<08:04,  5.39s/it]

tensor(2.9987, grad_fn=<AddBackward0>)
tensor(2.9408, grad_fn=<AddBackward0>)
tensor(2.4300, grad_fn=<AddBackward0>)
tensor(3.4432, grad_fn=<AddBackward0>)
tensor(3.2287, grad_fn=<AddBackward0>)
tensor(2.3704, grad_fn=<AddBackward0>)
tensor(3.3302, grad_fn=<AddBackward0>)
tensor(2.8993, grad_fn=<AddBackward0>)
tensor(2.3985, grad_fn=<AddBackward0>)
tensor(2.9653, grad_fn=<AddBackward0>)
tensor(3.1379, grad_fn=<AddBackward0>)
tensor(2.3406, grad_fn=<AddBackward0>)
tensor(2.6066, grad_fn=<AddBackward0>)
tensor(3.0862, grad_fn=<AddBackward0>)
tensor(3.4508, grad_fn=<AddBackward0>)
tensor(2.6384, grad_fn=<AddBackward0>)
tensor(2.9537, grad_fn=<AddBackward0>)
tensor(3.0272, grad_fn=<AddBackward0>)
tensor(2.8632, grad_fn=<AddBackward0>)
tensor(2.5221, grad_fn=<AddBackward0>)
tensor(2.9607, grad_fn=<AddBackward0>)
tensor(2.8241, grad_fn=<AddBackward0>)
tensor(2.9267, grad_fn=<AddBackward0>)
tensor(3.1852, grad_fn=<AddBackward0>)
tensor(2.9646, grad_fn=<AddBackward0>)
tensor(2.7272, grad_fn=<A

 11%|█         | 11/100.0 [00:58<08:12,  5.54s/it]

tensor(2.7553, grad_fn=<AddBackward0>)
tensor(3.3193, grad_fn=<AddBackward0>)
tensor(3.0739, grad_fn=<AddBackward0>)
tensor(2.7351, grad_fn=<AddBackward0>)
tensor(2.8813, grad_fn=<AddBackward0>)
tensor(2.5138, grad_fn=<AddBackward0>)
tensor(3.0738, grad_fn=<AddBackward0>)
tensor(2.5837, grad_fn=<AddBackward0>)
tensor(2.5880, grad_fn=<AddBackward0>)
tensor(2.6550, grad_fn=<AddBackward0>)
tensor(3.0501, grad_fn=<AddBackward0>)
tensor(3.4112, grad_fn=<AddBackward0>)
tensor(2.9479, grad_fn=<AddBackward0>)
tensor(3.6233, grad_fn=<AddBackward0>)
tensor(3.4014, grad_fn=<AddBackward0>)
tensor(3.6803, grad_fn=<AddBackward0>)
tensor(3.4025, grad_fn=<AddBackward0>)
tensor(3.2576, grad_fn=<AddBackward0>)
tensor(3.2046, grad_fn=<AddBackward0>)
tensor(3.1107, grad_fn=<AddBackward0>)
tensor(2.8642, grad_fn=<AddBackward0>)
tensor(3.4985, grad_fn=<AddBackward0>)
tensor(3.0044, grad_fn=<AddBackward0>)
tensor(3.1031, grad_fn=<AddBackward0>)
tensor(2.7588, grad_fn=<AddBackward0>)
tensor(3.5484, grad_fn=<A

 12%|█▏        | 12/100.0 [01:03<08:01,  5.47s/it]

tensor(2.6082, grad_fn=<AddBackward0>)
tensor(3.3115, grad_fn=<AddBackward0>)
tensor(3.1708, grad_fn=<AddBackward0>)
tensor(3.1928, grad_fn=<AddBackward0>)
tensor(2.8779, grad_fn=<AddBackward0>)
tensor(3.0270, grad_fn=<AddBackward0>)
tensor(2.8672, grad_fn=<AddBackward0>)
tensor(3.4752, grad_fn=<AddBackward0>)
tensor(3.1972, grad_fn=<AddBackward0>)
tensor(2.9157, grad_fn=<AddBackward0>)
tensor(2.7534, grad_fn=<AddBackward0>)
tensor(3.3973, grad_fn=<AddBackward0>)
tensor(4.0552, grad_fn=<AddBackward0>)
tensor(3.0884, grad_fn=<AddBackward0>)
tensor(3.3379, grad_fn=<AddBackward0>)
tensor(3.0043, grad_fn=<AddBackward0>)
tensor(3.1609, grad_fn=<AddBackward0>)
tensor(3.5923, grad_fn=<AddBackward0>)
tensor(2.4188, grad_fn=<AddBackward0>)
tensor(3.0938, grad_fn=<AddBackward0>)
tensor(3.0125, grad_fn=<AddBackward0>)
tensor(2.3768, grad_fn=<AddBackward0>)
tensor(2.8885, grad_fn=<AddBackward0>)
tensor(2.9543, grad_fn=<AddBackward0>)
tensor(2.7014, grad_fn=<AddBackward0>)
tensor(2.9732, grad_fn=<A

 13%|█▎        | 13/100.0 [01:09<08:01,  5.53s/it]

tensor(2.8527, grad_fn=<AddBackward0>)
tensor(2.4945, grad_fn=<AddBackward0>)
tensor(3.0686, grad_fn=<AddBackward0>)
tensor(4.1006, grad_fn=<AddBackward0>)
tensor(2.8300, grad_fn=<AddBackward0>)
tensor(2.3960, grad_fn=<AddBackward0>)
tensor(4.4742, grad_fn=<AddBackward0>)
tensor(5.1510, grad_fn=<AddBackward0>)
tensor(3.5330, grad_fn=<AddBackward0>)
tensor(2.9149, grad_fn=<AddBackward0>)
tensor(3.0066, grad_fn=<AddBackward0>)
tensor(3.1103, grad_fn=<AddBackward0>)
tensor(3.0388, grad_fn=<AddBackward0>)
tensor(3.5269, grad_fn=<AddBackward0>)
tensor(3.4867, grad_fn=<AddBackward0>)
tensor(3.4738, grad_fn=<AddBackward0>)
tensor(2.8077, grad_fn=<AddBackward0>)
tensor(2.4177, grad_fn=<AddBackward0>)
tensor(3.4078, grad_fn=<AddBackward0>)
tensor(3.5681, grad_fn=<AddBackward0>)
tensor(2.8328, grad_fn=<AddBackward0>)
tensor(2.6237, grad_fn=<AddBackward0>)
tensor(3.0341, grad_fn=<AddBackward0>)
tensor(3.0532, grad_fn=<AddBackward0>)
tensor(3.4401, grad_fn=<AddBackward0>)
tensor(2.8646, grad_fn=<A

 14%|█▍        | 14/100.0 [01:14<07:47,  5.44s/it]

tensor(3.5405, grad_fn=<AddBackward0>)
tensor(2.6876, grad_fn=<AddBackward0>)
tensor(2.7003, grad_fn=<AddBackward0>)
tensor(2.9471, grad_fn=<AddBackward0>)
tensor(2.6658, grad_fn=<AddBackward0>)
tensor(3.4869, grad_fn=<AddBackward0>)
tensor(2.2580, grad_fn=<AddBackward0>)
tensor(2.3767, grad_fn=<AddBackward0>)
tensor(2.5259, grad_fn=<AddBackward0>)
tensor(2.7391, grad_fn=<AddBackward0>)
tensor(2.9428, grad_fn=<AddBackward0>)
tensor(2.6794, grad_fn=<AddBackward0>)
tensor(2.3477, grad_fn=<AddBackward0>)
tensor(2.4064, grad_fn=<AddBackward0>)
tensor(2.6970, grad_fn=<AddBackward0>)
tensor(2.5731, grad_fn=<AddBackward0>)
tensor(2.5711, grad_fn=<AddBackward0>)
tensor(2.7162, grad_fn=<AddBackward0>)
tensor(2.3935, grad_fn=<AddBackward0>)
tensor(3.2858, grad_fn=<AddBackward0>)
tensor(2.1015, grad_fn=<AddBackward0>)
tensor(3.3549, grad_fn=<AddBackward0>)
tensor(2.8320, grad_fn=<AddBackward0>)
tensor(2.7755, grad_fn=<AddBackward0>)
tensor(2.3740, grad_fn=<AddBackward0>)
tensor(2.0981, grad_fn=<A

 15%|█▌        | 15/100.0 [01:20<07:36,  5.37s/it]

tensor(3.5946, grad_fn=<AddBackward0>)
tensor(2.7765, grad_fn=<AddBackward0>)
tensor(2.5692, grad_fn=<AddBackward0>)
tensor(2.4131, grad_fn=<AddBackward0>)


 15%|█▌        | 15/100.0 [01:22<07:45,  5.48s/it]


KeyboardInterrupt: 