# SAC

In [158]:
import math
import random
import sys

import gym
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions import Normal
from tensorboardX import SummaryWriter

from IPython.display import clear_output
import matplotlib.pyplot as plt
from matplotlib import animation
from IPython.display import display

%matplotlib inline

use_cuda = torch.cuda.is_available()
device   = torch.device("cuda" if use_cuda else "cpu")

# Networks

In [159]:
def gaussian_likelihood(x, mu, log_std, eps=1e-8):
    #-0.5 * (((x-mu)/(tf.exp(log_std)+EPS))**2 + 2*log_std + np.log(2*np.pi))
#     return Normal(mu, std).log_prob(mean+ std*z.to(device)) - torch.log(1 - action.pow(2) + epsilon)
    return torch.sum(-0.5  * ((x-mu) / log_std.exp() + eps)**2 + 2 * log_std + np.log(2*np.pi))

In [160]:
class ValueNetwork(nn.Module):
    def __init__(self, state_dim, hidden_dim=[400,300], init_w=3e-3):
        super(ValueNetwork, self).__init__()
        
        self.linear1 = nn.Linear(state_dim, hidden_dim[0])
        self.linear2 = nn.Linear(hidden_dim[0], hidden_dim[1])
        self.linear3 = nn.Linear(hidden_dim[1], 1)
        
        self.linear3.weight.data.uniform_(-init_w, init_w)
        self.linear3.bias.data.uniform_(-init_w, init_w)
        
    def forward(self, state):
        x = F.relu(self.linear1(state))
        x = F.relu(self.linear2(x))
        x = self.linear3(x)
        return x

In [161]:
class SoftQNetwork(nn.Module):
    def __init__(self, num_inputs, num_actions, hidden_size=[400,300], init_w=3e-3):
        super(SoftQNetwork, self).__init__()
        
        self.linear1 = nn.Linear(num_inputs + num_actions, hidden_size[0])
        self.linear2 = nn.Linear(hidden_size[0], hidden_size[1])
        self.linear3 = nn.Linear(hidden_size[1], 1)
        
        self.linear3.weight.data.uniform_(-init_w, init_w)
        self.linear3.bias.data.uniform_(-init_w, init_w)
        
    def forward(self, state, action):
        x = torch.cat([state, action], 1)
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linear3(x)
        return x

In [162]:
class PolicyNetwork(nn.Module):
    def __init__(self, num_inputs, num_actions, hidden_size=[400,300], init_w=3e-3, log_std_min=-20, log_std_max=2, epsilon=1e-6):
        super(PolicyNetwork, self).__init__()
        
        self.epsilon = epsilon
        
        self.log_std_min = log_std_min
        self.log_std_max = log_std_max
        
        self.linear1 = nn.Linear(num_inputs, hidden_size[0])
        self.linear2 = nn.Linear(hidden_size[0], hidden_size[1])
        
        self.mean_linear = nn.Linear(hidden_size[1], num_actions)
        self.mean_linear.weight.data.uniform_(-init_w, init_w)
        self.mean_linear.bias.data.uniform_(-init_w, init_w)
        
        self.log_std_linear = nn.Linear(hidden_size[1], num_actions)
        self.log_std_linear.weight.data.uniform_(-init_w, init_w)
        self.log_std_linear.bias.data.uniform_(-init_w, init_w)
        
    def forward(self, state):
        x = F.relu(self.linear1(state))
        x = F.relu(self.linear2(x))
        
        mean    = self.mean_linear(x)
        log_std = self.log_std_linear(x)
        log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)

        return mean, log_std
        
    def evaluate(self, state, epsilon=1e-6):
        mean, log_std = self.forward(state)
        std = log_std.exp()
        
        normal = Normal(0, 1)
        z = mean + std * normal.sample().requires_grad_()
        action = torch.tanh(z)
        logp_pi = Normal(mean, std).log_prob(z) - torch.log(1 - action * action + self.epsilon)
        logp_pi = logp_pi.sum(dim=1, keepdim=True)
        
        return action, logp_pi, mean, log_std
        
    
    def get_action(self, state, deterministic=False):
        state = torch.FloatTensor(state).unsqueeze(0).to(device)
        mean, log_std = self.forward(state)
        std = log_std.exp()

        normal = Normal(0, 1)
        z = mean + std * normal.sample().requires_grad_()
        action = torch.tanh(mean) if deterministic else torch.tanh(z)
        
        action  = action.cpu() #.detach().cpu().numpy()
        return action[0]

# Memory

In [163]:
class ReplayBuffer:
    def __init__(self, capacity):
        self.capacity = capacity
        self.buffer = []
        self.position = 0
    
    def push(self, state, action, reward, next_state, done):
        if len(self.buffer) < self.capacity:
            self.buffer.append(None)
        
        self.buffer[self.position] = (state, action, reward, next_state, done)
        self.position = (self.position + 1) % self.capacity
    
    def sample(self, batch_size):
        batch = random.sample(self.buffer, batch_size)
        state, action, reward, next_state, done = map(np.stack, zip(*batch))
        return state, action, reward, next_state, done
    
    def __len__(self):
        return len(self.buffer)

In [175]:
class NormalizedActions(gym.ActionWrapper):
    def _action(self, action):
        low  = self.action_space.low
        high = self.action_space.high
        print(":NA",action)
        action = low + (action + 1.0) * 0.5 * (high - low)
        action = np.clip(action, low, high)
        
        return action

    def _reverse_action(self, action):
        low  = self.action_space.low
        high = self.action_space.high
        
        action = 2 * (action - low) / (high - low) - 1
        action = np.clip(action, low, high)
        
        return actions

In [176]:
def plot(frame_idx, rewards):
    clear_output(True)
    plt.figure(figsize=(20,5))
    plt.subplot(131)
    plt.title('frame %s. reward: %s' % (frame_idx, rewards[-1]))
    plt.plot(rewards)
    plt.show()

In [177]:
def loss_function(value, target):
    """ Carries out a variation of mean squared error

        Args:
            value: current prediciton value
            target: target value

        Returns:
            MSE of the predicted value
    """
    return 0.5 * torch.mean((target.detach() - value)**2)

# Agent

In [178]:
class SAC(object):
    
    def __init__(self, state_dim, action_dim,env, auto_alpha_tuning=False):
        
        self.action_dim = action_dim
        self.state_dim  = state_dim
        self.hidden_dim = [400,300]
        
        self.auto_alpha_tuning = auto_alpha_tuning
        
        # Value init
        self.value_net        = ValueNetwork(self.state_dim, self.hidden_dim).to(device)
        self.target_value_net = ValueNetwork(self.state_dim, self.hidden_dim).to(device)
        
        for target_param, param in zip(self.target_value_net.parameters(), self.value_net.parameters()):
            target_param.data.copy_(param.data)
            
        
        # Soft Q
        self.soft_q_net1 = SoftQNetwork(self.state_dim, self.action_dim, self.hidden_dim).to(device)
        self.soft_q_net2 = SoftQNetwork(self.state_dim, self.action_dim, self.hidden_dim).to(device)
        self.target_soft_q_net1 = SoftQNetwork(self.state_dim, self.action_dim, self.hidden_dim).to(device)
        self.target_soft_q_net2 = SoftQNetwork(self.state_dim, self.action_dim, self.hidden_dim).to(device)
        
        for target_param, param in zip(self.target_soft_q_net1.parameters(), self.soft_q_net1.parameters()):
            target_param.data.copy_(param.data)
        
        for target_param, param in zip(self.target_soft_q_net2.parameters(), self.soft_q_net2.parameters()):
            target_param.data.copy_(param.data)
        
        # Policy
        self.policy_net = PolicyNetwork(self.state_dim, self.action_dim, self.hidden_dim).to(device)

        # Optimizers/Loss
        self.value_criterion  = nn.MSELoss()
        self.soft_q_criterion = nn.MSELoss()
        

        self.value_lr  = 3e-4
        self.soft_q_lr = 3e-4
        self.policy_lr = 3e-4

        self.value_optimizer  = optim.Adam(self.value_net.parameters(), lr=self.value_lr)
        self.soft_q_optimizer1 = optim.Adam(self.soft_q_net1.parameters(), lr=self.soft_q_lr)
        self.soft_q_optimizer2 = optim.Adam(self.soft_q_net2.parameters(), lr=self.soft_q_lr)
        self.policy_optimizer = optim.Adam(self.policy_net.parameters(), lr=self.policy_lr)
        
        if auto_alpha_tuning:
            self.target_entropy = -np.prod(env.action_space.shape).item()
            self.log_alpha = torch.zeros(1, requires_grad=True, device=device)
            self.alpha_optimizer = optim.Adam(self.log_alpha, lr=self.policy_lr)


        replay_buffer_size = 1000000
        self.replay_buffer = ReplayBuffer(replay_buffer_size)
    
    def select_action(self,state):
        act = self.policy_net.get_action(state)
        print(act.detach())
        return act.detach()
        
        
    def update(self, batch_size,gamma=0.99,soft_tau=1e-2):
        
        state, action, reward, next_state, done = self.replay_buffer.sample(batch_size)

        state      = torch.FloatTensor(state).to(device)
        next_state = torch.FloatTensor(next_state).to(device)
        action     = torch.FloatTensor(action).to(device)
        reward     = torch.FloatTensor(reward).unsqueeze(1).to(device)
        done       = torch.FloatTensor(np.float32(done)).unsqueeze(1).to(device)

        predicted_q_value1 = self.soft_q_net1(state, action)
        predicted_q_value2 = self.soft_q_net2(state, action)
        predicted_value    = self.value_net(state)
        new_action, log_prob, mean, log_std = self.policy_net.evaluate(state)
        
    
        # Auto alpha tuning
        if self.auto_alpha_tuning:
            alpha_loss = -(self.log_alpha * (log_prob + self.target_entropy).detach()).mean()
            self.alpha_optimizer.zero_grad()
            alpha_loss.backward()
            self.alpha_optimizer.step()
            alpha = self.log_alpha.exp()
        else:
            alpha_loss = 0
            alpha = 0.2 # constant used by OpenAI



    # Training Q Function
        target_value = self.target_value_net(next_state) 
        target_q_value = reward + (1 - done) * gamma * target_value
#         q_value_loss1 = self.soft_q_criterion1(predicted_q_value1, target_q_value.detach())
#         q_value_loss2 = self.soft_q_criterion2(predicted_q_value2, target_q_value.detach())
        q_value_loss1 = 0.5 * torch.mean((target_q_value.detach() - predicted_q_value1)**2)
        q_value_loss2 = 0.5 * torch.mean((target_q_value.detach() - predicted_q_value2)**2)


        self.soft_q_optimizer1.zero_grad()
        q_value_loss1.backward()
        self.soft_q_optimizer1.step()
        self.soft_q_optimizer2.zero_grad()
        q_value_loss2.backward()
        self.soft_q_optimizer2.step()  
        
    # Training Value Function
        predicted_new_q_value = torch.min(self.soft_q_net1(state, new_action),self.soft_q_net2(state, new_action))
        target_value_func = predicted_new_q_value - log_prob * 0.2 #alpha
#         value_loss = self.value_criterion(predicted_value, target_value_func.detach())
        value_loss = 0.5 * torch.mean((target_value_func.detach() - predicted_value)**2)


        self.value_optimizer.zero_grad()
        value_loss.backward()
        self.value_optimizer.step()
    # Training Policy Function
#         policy_loss = (log_prob - predicted_new_q_value).mean()
        policy_loss = (alpha * log_prob - predicted_new_q_value).mean()

        self.policy_optimizer.zero_grad()
        policy_loss.backward()
        self.policy_optimizer.step()


        for target_param, param in zip(self.target_value_net.parameters(), self.value_net.parameters()):
            target_param.data.copy_(
                target_param.data * (1.0 - soft_tau) + param.data * soft_tau
            )
        
    

# Runner

In [179]:
class Runner():
    """Carries out the environment steps and adds experiences to memory"""
    
    def __init__(self, env, agent):
        
        self.env = env
        self.agent = agent
#         self.replay_buffer = replay_buffer
        self.obs = env.reset()
        self.done = False
        
    def next_step(self, episode_timesteps, noise=0.1):
        
        action = self.agent.select_action(self.obs)
        

        # Perform action
        # TODO: Clean up get action 
        new_obs, reward, done, _ = self.env.step(action.numpy()) 
        done_bool = 0 if episode_timesteps + 1 == 200 else float(done)
    
        # Store data in replay buffer
#         replay_buffer.add((self.obs, new_obs, action, reward, done_bool))
        self.agent.replay_buffer.push(self.obs, action, reward, new_obs, done_bool)
        
        self.obs = new_obs
        
        if done:
            self.obs = self.env.reset()
            done = False
            
            return reward, True
        
        return reward, done

# Observe

In [180]:
def observe(env, agent, observation_steps):
    """run episodes while taking random actions and filling replay_buffer
    
        Args:
            env (env): gym environment
            replay_buffer(ReplayBuffer): buffer to store experience replay
            observation_steps (int): how many steps to observe for
    
    """
    
    time_steps = 0
    obs = env.reset()
    done = False

    while time_steps < observation_steps:
        action = env.action_space.sample()
        new_obs, reward, done, _ = env.step(action)

        agent.replay_buffer.push(obs, action, reward, new_obs, done)

        obs = new_obs
        time_steps += 1

        if done:
            obs = env.reset()
            done = False

        print("\rPopulating Buffer {}/{}.".format(time_steps, observation_steps), end="")
        sys.stdout.flush()

# Train

In [181]:
def train(agent, test_env):
    """Train the agent for exploration steps
    
        Args:
            agent (Agent): agent to use
            env (environment): gym environment
            writer (SummaryWriter): tensorboard writer
            exploration (int): how many training steps to run
    
    """

    total_timesteps = 0
    timesteps_since_eval = 0
    episode_num = 0
    episode_reward = 0
    episode_timesteps = 0
    done = False 
    obs = env.reset()
    evaluations = []
    rewards = []
    best_avg = -2000
    
    writer = SummaryWriter(comment="-TD3_Baseline_HalfCheetah")
    
    while total_timesteps < EXPLORATION:
    
        if done: 

            if total_timesteps != 0: 
                rewards.append(episode_reward)
                
                if total_timesteps % 1000 == 0:
                    plot(len(rewards), rewards)
                
                avg_reward = np.mean(rewards[-100:])
                
                writer.add_scalar("avg_reward", avg_reward, total_timesteps)
                writer.add_scalar("reward_step", reward, total_timesteps)
                writer.add_scalar("episode_reward", episode_reward, total_timesteps)
                
                print("\rTotal T: {:d} Episode Num: {:d} Reward: {:f} Avg Reward: {:f}".format(
                    total_timesteps, episode_num, episode_reward, avg_reward), end="")
                sys.stdout.flush()


                if avg_reward >= REWARD_THRESH:
                    break

#                 agent.update(replay_buffer, episode_timesteps, BATCH_SIZE, GAMMA, TAU, NOISE, NOISE_CLIP, POLICY_FREQUENCY)
#                 agent.update(128)

                episode_reward = 0
                episode_timesteps = 0
                episode_num += 1 

        reward, done = runner.next_step(episode_timesteps)
        episode_reward += reward

        episode_timesteps += 1
        total_timesteps += 1
        timesteps_since_eval += 1
        
        agent.update(100)
        
        

# Config

In [182]:
ENV = "Pendulum-v0"
SEED = 0
OBSERVATION = 1000
EXPLORATION = 40000
BATCH_SIZE = 128
GAMMA = 0.99
TAU = 0.005
NOISE = 0.2
NOISE_CLIP = 0.5
EXPLORE_NOISE = 0.1
POLICY_FREQUENCY = 2
EVAL_FREQUENCY = 5000
REWARD_THRESH = -100

# Main

In [183]:
env = NormalizedActions(gym.make(ENV))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set seeds
env.seed(SEED)
torch.manual_seed(SEED)
np.random.seed(SEED)

state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0] 
max_action = float(env.action_space.high[0])

agent = SAC(state_dim, action_dim, env)

# replay_buffer = ReplayBuffer()

runner = Runner(env, agent)

total_timesteps = 0
timesteps_since_eval = 0
episode_num = 0
done = True

In [184]:
# Populate replay buffer
observe(env, agent, OBSERVATION)

:NA [-1.4115582]
Populating Buffer 1/1000.:NA [-1.1422379]
Populating Buffer 2/1000.:NA [1.709045]
Populating Buffer 3/1000.:NA [-1.2351859]
Populating Buffer 4/1000.:NA [-1.7089324]
Populating Buffer 5/1000.:NA [-1.4696925]
Populating Buffer 6/1000.:NA [1.0783552]
Populating Buffer 7/1000.:NA [0.24832644]
Populating Buffer 8/1000.:NA [-1.4571455]
Populating Buffer 9/1000.:NA [-0.31708896]
Populating Buffer 10/1000.:NA [-1.0485669]
Populating Buffer 11/1000.:NA [1.6939832]
Populating Buffer 12/1000.:NA [-0.32162488]
Populating Buffer 13/1000.:NA [1.3121978]
Populating Buffer 14/1000.:NA [-0.38343146]
Populating Buffer 15/1000.:NA [-1.6480492]
Populating Buffer 16/1000.:NA [1.1483951]
Populating Buffer 17/1000.:NA [1.3547984]
Populating Buffer 18/1000.:NA [-0.28886175]
Populating Buffer 19/1000.:NA [0.7663776]
Populating Buffer 20/1000.:NA [-1.6621842]
Populating Buffer 21/1000.:NA [-1.510634]
Populating Buffer 22/1000.:NA [0.3582203]
Populating Buffer 23/1000.:NA [-1.9299922]
Populatin

Populating Buffer 190/1000.:NA [-1.0001824]
Populating Buffer 191/1000.:NA [-1.7064621]
Populating Buffer 192/1000.:NA [-1.6483148]
Populating Buffer 193/1000.:NA [1.5993254]
Populating Buffer 194/1000.:NA [-0.2039566]
Populating Buffer 195/1000.:NA [1.4986091]
Populating Buffer 196/1000.:NA [-0.1356702]
Populating Buffer 197/1000.:NA [-1.573917]
Populating Buffer 198/1000.:NA [-0.25622237]
Populating Buffer 199/1000.:NA [-0.9418304]
Populating Buffer 200/1000.:NA [-0.34033558]
Populating Buffer 201/1000.:NA [0.61698186]
Populating Buffer 202/1000.:NA [-0.07707247]
Populating Buffer 203/1000.:NA [0.36000416]
Populating Buffer 204/1000.:NA [1.415485]
Populating Buffer 205/1000.:NA [-0.20732573]
Populating Buffer 206/1000.:NA [1.2920921]
Populating Buffer 207/1000.:NA [-0.1818087]
Populating Buffer 208/1000.:NA [1.9503522]
Populating Buffer 209/1000.:NA [1.4635676]
Populating Buffer 210/1000.:NA [0.90666616]
Populating Buffer 211/1000.:NA [-1.5853293]
Populating Buffer 212/1000.:NA [-0.9

Populating Buffer 377/1000.:NA [-1.7268633]
Populating Buffer 378/1000.:NA [-0.9849204]
Populating Buffer 379/1000.:NA [1.3713031]
Populating Buffer 380/1000.:NA [1.2835835]
Populating Buffer 381/1000.:NA [0.4724397]
Populating Buffer 382/1000.:NA [-1.119977]
Populating Buffer 383/1000.:NA [1.1072699]
Populating Buffer 384/1000.:NA [-1.4812236]
Populating Buffer 385/1000.:NA [-1.4399416]
Populating Buffer 386/1000.:NA [-0.1016591]
Populating Buffer 387/1000.:NA [-0.29025626]
Populating Buffer 388/1000.:NA [0.82201487]
Populating Buffer 389/1000.:NA [0.8635012]
Populating Buffer 390/1000.:NA [0.6413377]
Populating Buffer 391/1000.:NA [1.5919101]
Populating Buffer 392/1000.:NA [-1.208416]
Populating Buffer 393/1000.:NA [-1.6085541]
Populating Buffer 394/1000.:NA [-1.7840214]
Populating Buffer 395/1000.:NA [-1.0143069]
Populating Buffer 396/1000.:NA [1.5970881]
Populating Buffer 397/1000.:NA [0.7419263]
Populating Buffer 398/1000.:NA [-0.62239534]
Populating Buffer 399/1000.:NA [0.8789592

Populating Buffer 564/1000.:NA [-1.3096842]
Populating Buffer 565/1000.:NA [-1.409599]
Populating Buffer 566/1000.:NA [0.42161688]
Populating Buffer 567/1000.:NA [1.8879614]
Populating Buffer 568/1000.:NA [0.46566242]
Populating Buffer 569/1000.:NA [0.43725482]
Populating Buffer 570/1000.:NA [-1.2124008]
Populating Buffer 571/1000.:NA [-1.2622926]
Populating Buffer 572/1000.:NA [0.3339367]
Populating Buffer 573/1000.:NA [0.43575352]
Populating Buffer 574/1000.:NA [-1.5818095]
Populating Buffer 575/1000.:NA [-1.4391224]
Populating Buffer 576/1000.:NA [1.529283]
Populating Buffer 577/1000.:NA [-0.28049445]
Populating Buffer 578/1000.:NA [1.1146237]
Populating Buffer 579/1000.:NA [0.36559016]
Populating Buffer 580/1000.:NA [0.77048063]
Populating Buffer 581/1000.:NA [-1.4109144]
Populating Buffer 582/1000.:NA [0.95333487]
Populating Buffer 583/1000.:NA [0.54689455]
Populating Buffer 584/1000.:NA [-1.9699016]
Populating Buffer 585/1000.:NA [1.2872535]
Populating Buffer 586/1000.:NA [0.7017

Populating Buffer 751/1000.:NA [-1.8336669]
Populating Buffer 752/1000.:NA [-1.6990646]
Populating Buffer 753/1000.:NA [-1.8603398]
Populating Buffer 754/1000.:NA [0.8404353]
Populating Buffer 755/1000.:NA [1.0054431]
Populating Buffer 756/1000.:NA [-1.164175]
Populating Buffer 757/1000.:NA [-1.3155698]
Populating Buffer 758/1000.:NA [-0.28700858]
Populating Buffer 759/1000.:NA [0.6529085]
Populating Buffer 760/1000.:NA [0.6811418]
Populating Buffer 761/1000.:NA [1.9091443]
Populating Buffer 762/1000.:NA [-0.9505627]
Populating Buffer 763/1000.:NA [0.47786808]
Populating Buffer 764/1000.:NA [0.02033617]
Populating Buffer 765/1000.:NA [1.3420175]
Populating Buffer 766/1000.:NA [-1.8690997]
Populating Buffer 767/1000.:NA [0.9467019]
Populating Buffer 768/1000.:NA [-1.1498455]
Populating Buffer 769/1000.:NA [-1.9541808]
Populating Buffer 770/1000.:NA [-1.0712165]
Populating Buffer 771/1000.:NA [-0.3464153]
Populating Buffer 772/1000.:NA [1.4406956]
Populating Buffer 773/1000.:NA [0.787334

Populating Buffer 938/1000.:NA [0.69419634]
Populating Buffer 939/1000.:NA [0.7563534]
Populating Buffer 940/1000.:NA [1.4827274]
Populating Buffer 941/1000.:NA [0.17631611]
Populating Buffer 942/1000.:NA [0.7335105]
Populating Buffer 943/1000.:NA [1.7777221]
Populating Buffer 944/1000.:NA [-1.8421688]
Populating Buffer 945/1000.:NA [0.45442212]
Populating Buffer 946/1000.:NA [-0.60327953]
Populating Buffer 947/1000.:NA [-1.7740428]
Populating Buffer 948/1000.:NA [-0.4225938]
Populating Buffer 949/1000.:NA [1.4364979]
Populating Buffer 950/1000.:NA [0.29649642]
Populating Buffer 951/1000.:NA [0.096619]
Populating Buffer 952/1000.:NA [0.4366295]
Populating Buffer 953/1000.:NA [-1.6770214]
Populating Buffer 954/1000.:NA [1.8874038]
Populating Buffer 955/1000.:NA [-1.5803338]
Populating Buffer 956/1000.:NA [1.5691458]
Populating Buffer 957/1000.:NA [0.75784314]
Populating Buffer 958/1000.:NA [-0.4634906]
Populating Buffer 959/1000.:NA [1.6906483]
Populating Buffer 960/1000.:NA [0.94047445

In [185]:
# Train agent
train(agent, env)

tensor([0.6344])
:NA [0.63442016]
tensor([-0.1962])
:NA [-0.19623272]
tensor([0.5402])
:NA [0.5402435]
tensor([-0.8755])
:NA [-0.8754601]
tensor([0.1857])
:NA [0.18568997]
tensor([0.7841])
:NA [0.78411275]
tensor([0.8358])
:NA [0.8357723]
tensor([0.6229])
:NA [0.62286645]
tensor([0.6203])
:NA [0.6202917]
tensor([-0.8157])
:NA [-0.8156866]
tensor([0.2027])
:NA [0.20266503]
tensor([-0.9771])
:NA [-0.9771278]
tensor([0.0870])
:NA [0.08696546]
tensor([0.8104])
:NA [0.81044877]
tensor([-0.6184])
:NA [-0.61838305]
tensor([-0.4501])
:NA [-0.4501456]
tensor([-0.7403])
:NA [-0.74026024]
tensor([-0.1185])
:NA [-0.11852128]
tensor([-0.1133])
:NA [-0.11328401]
tensor([0.3458])
:NA [0.34583384]
tensor([0.7674])
:NA [0.7673752]
tensor([-0.8266])
:NA [-0.82658184]
tensor([-0.4953])
:NA [-0.49533474]
tensor([-0.9276])
:NA [-0.92764527]
tensor([-0.5449])
:NA [-0.5449375]
tensor([-0.0880])
:NA [-0.08800083]
tensor([0.8530])
:NA [0.8530392]
tensor([0.0544])
:NA [0.05437566]
tensor([-0.6377])
:NA [-0.6376

tensor([0.1237])
:NA [0.12369279]
tensor([0.5012])
:NA [0.50123614]
tensor([-0.5076])
:NA [-0.50758]
tensor([-0.2865])
:NA [-0.28648153]
tensor([-0.2908])
:NA [-0.29081148]
tensor([-0.0283])
:NA [-0.02829988]
tensor([-0.8313])
:NA [-0.83128023]
tensor([-0.9708])
:NA [-0.9708061]
tensor([-0.7880])
:NA [-0.7880407]
tensor([-0.7766])
:NA [-0.7765654]
tensor([-0.6604])
:NA [-0.66042846]
tensor([-0.5001])
:NA [-0.5001333]
tensor([-0.7275])
:NA [-0.72754246]
tensor([0.1226])
:NA [0.12263921]
tensor([-0.7234])
:NA [-0.7234162]
tensor([0.5801])
:NA [0.5800723]
tensor([-0.4310])
:NA [-0.4310172]
tensor([-0.9867])
:NA [-0.9866591]
tensor([-0.6991])
:NA [-0.6991199]
tensor([0.2137])
:NA [0.21371411]
tensor([0.7249])
:NA [0.72488654]
tensor([0.1633])
:NA [0.1633187]
tensor([0.7466])
:NA [0.746614]
tensor([0.8064])
:NA [0.8064104]
tensor([0.3213])
:NA [0.3213251]
tensor([0.9379])
:NA [0.9379213]
tensor([0.3917])
:NA [0.39171472]
tensor([0.4900])
:NA [0.48996204]
tensor([-0.2006])
:NA [-0.20056139]


tensor([-0.4761])
:NA [-0.4761285]
tensor([-0.5790])
:NA [-0.5789707]
tensor([-0.7982])
:NA [-0.79818887]
tensor([-0.3506])
:NA [-0.3506274]
tensor([-0.4077])
:NA [-0.40774372]
tensor([-0.6660])
:NA [-0.66600513]
tensor([-0.1930])
:NA [-0.19301105]
tensor([0.5460])
:NA [0.5459671]
tensor([-0.7102])
:NA [-0.7101713]
tensor([0.1487])
:NA [0.14871794]
tensor([-0.5168])
:NA [-0.5167643]
tensor([-0.8893])
:NA [-0.8893392]
tensor([0.4824])
:NA [0.48243797]
tensor([0.8163])
:NA [0.8163221]
tensor([-0.8315])
:NA [-0.8315221]
tensor([0.0881])
:NA [0.08805505]
tensor([0.5447])
:NA [0.54471916]
tensor([0.0218])
:NA [0.02184925]
tensor([-0.5191])
:NA [-0.51912826]
tensor([0.6012])
:NA [0.6012379]
tensor([0.7963])
:NA [0.79625833]
tensor([0.8906])
:NA [0.89060056]
tensor([0.0874])
:NA [0.08739486]
tensor([0.7280])
:NA [0.7279719]
tensor([0.2256])
:NA [0.22558174]
tensor([0.7488])
:NA [0.74884444]
tensor([-0.2376])
:NA [-0.23758325]
tensor([0.1425])
:NA [0.14250296]
tensor([-0.5975])
:NA [-0.5974633

KeyboardInterrupt: 