# SAC

In [1]:
import math
import random
import sys

import gym
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions import Normal
from tensorboardX import SummaryWriter

from IPython.display import clear_output
import matplotlib.pyplot as plt
from matplotlib import animation
from IPython.display import display

%matplotlib inline

use_cuda = torch.cuda.is_available()
device   = torch.device("cuda" if use_cuda else "cpu")

# Networks

In [2]:
class ValueNetwork(nn.Module):
    def __init__(self, state_dim, hidden_dim, init_w=3e-3):
        super(ValueNetwork, self).__init__()
        
        self.linear1 = nn.Linear(state_dim, hidden_dim)
        self.linear2 = nn.Linear(hidden_dim, hidden_dim)
        self.linear3 = nn.Linear(hidden_dim, 1)
        
        self.linear3.weight.data.uniform_(-init_w, init_w)
        self.linear3.bias.data.uniform_(-init_w, init_w)
        
    def forward(self, state):
        x = F.relu(self.linear1(state))
        x = F.relu(self.linear2(x))
        x = self.linear3(x)
        return x

In [3]:
class SoftQNetwork(nn.Module):
    def __init__(self, num_inputs, num_actions, hidden_size, init_w=3e-3):
        super(SoftQNetwork, self).__init__()
        
        self.linear1 = nn.Linear(num_inputs + num_actions, hidden_size)
        self.linear2 = nn.Linear(hidden_size, hidden_size)
        self.linear3 = nn.Linear(hidden_size, 1)
        
        self.linear3.weight.data.uniform_(-init_w, init_w)
        self.linear3.bias.data.uniform_(-init_w, init_w)
        
    def forward(self, state, action):
        x = torch.cat([state, action], 1)
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = self.linear3(x)
        return x

In [4]:
class PolicyNetwork(nn.Module):
    def __init__(self, num_inputs, num_actions, hidden_size, init_w=3e-3, log_std_min=-20, log_std_max=2):
        super(PolicyNetwork, self).__init__()
        
        self.log_std_min = log_std_min
        self.log_std_max = log_std_max
        
        self.linear1 = nn.Linear(num_inputs, hidden_size)
        self.linear2 = nn.Linear(hidden_size, hidden_size)
        
        self.mean_linear = nn.Linear(hidden_size, num_actions)
        self.mean_linear.weight.data.uniform_(-init_w, init_w)
        self.mean_linear.bias.data.uniform_(-init_w, init_w)
        
        self.log_std_linear = nn.Linear(hidden_size, num_actions)
        self.log_std_linear.weight.data.uniform_(-init_w, init_w)
        self.log_std_linear.bias.data.uniform_(-init_w, init_w)
        
    def forward(self, state):
        x = F.relu(self.linear1(state))
        x = F.relu(self.linear2(x))
        
        mean    = self.mean_linear(x)
        log_std = self.log_std_linear(x)
        log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)
        
        return mean, log_std
    
    def evaluate(self, state, epsilon=1e-6):
        mean, log_std = self.forward(state)
        std = log_std.exp()
        
        print("Mu", mean)
        
        normal = Normal(0, 1)
        z      = normal.sample()
        action = torch.tanh(mean+ std*z.to(device))
        log_prob = Normal(mean, std).log_prob(mean+ std*z.to(device)) - torch.log(1 - action.pow(2) + epsilon)
        return action, log_prob, z, mean, log_std
        
    
    def get_action(self, state):
        state = torch.FloatTensor(state).unsqueeze(0).to(device)
        mean, log_std = self.forward(state)
        std = log_std.exp()
        
        normal = Normal(0, 1)
        z      = normal.sample().to(device)
        action = torch.tanh(mean + std*z)
        
        action  = action.cpu()#.detach().cpu().numpy()
        return action[0]

# Memory

In [5]:
class ReplayBuffer:
    def __init__(self, capacity):
        self.capacity = capacity
        self.buffer = []
        self.position = 0
    
    def push(self, state, action, reward, next_state, done):
        if len(self.buffer) < self.capacity:
            self.buffer.append(None)
        
        self.buffer[self.position] = (state, action, reward, next_state, done)
        self.position = (self.position + 1) % self.capacity
    
    def sample(self, batch_size):
        batch = random.sample(self.buffer, batch_size)
        state, action, reward, next_state, done = map(np.stack, zip(*batch))
        return state, action, reward, next_state, done
    
    def __len__(self):
        return len(self.buffer)

In [6]:
class ReplayBufferTest(object):
    """Buffer to store tuples of experience replay"""
    
    def __init__(self, max_size=1000000):
        """
        Args:
            max_size (int): total amount of tuples to store
        """
        
        self.storage = []
        self.max_size = max_size
        self.ptr = 0

    def add(self, data):
        """Add experience tuples to buffer
        
        Args:
            data (tuple): experience replay tuple
        """
        
        if len(self.storage) == self.max_size:
            self.storage[int(self.ptr)] = data
            self.ptr = (self.ptr + 1) % self.max_size
        else:
            self.storage.append(data)

    def sample(self, batch_size):
        """Samples a random amount of experiences from buffer of batch size
        
        Args:
            batch_size (int): size of sample
        """
        
        ind = np.random.randint(0, len(self.storage), size=batch_size)
        states, actions, next_states, rewards, dones = [], [], [], [], []

        for i in ind: 
            s, a, s_, r, d = self.storage[i]
            states.append(np.array(s, copy=False))
            actions.append(np.array(a, copy=False))
            next_states.append(np.array(s_, copy=False))
            rewards.append(np.array(r, copy=False))
            dones.append(np.array(d, copy=False))

        return np.array(states), np.array(actions), np.array(rewards).reshape(-1, 1), np.array(next_states),np.array(dones).reshape(-1, 1)

In [7]:
class NormalizedActions(gym.ActionWrapper):
    def _action(self, action):
        low  = self.action_space.low
        high = self.action_space.high
        
        action = low + (action + 1.0) * 0.5 * (high - low)
        action = np.clip(action, low, high)
        
        return action

    def _reverse_action(self, action):
        low  = self.action_space.low
        high = self.action_space.high
        
        action = 2 * (action - low) / (high - low) - 1
        action = np.clip(action, low, high)
        
        return actions

In [8]:
def plot(frame_idx, rewards):
    clear_output(True)
    plt.figure(figsize=(20,5))
    plt.subplot(131)
    plt.title('frame %s. reward: %s' % (frame_idx, rewards[-1]))
    plt.plot(rewards)
    plt.show()

# Agent

In [9]:
class SAC(object):
    
    def __init__(self, state_dim, action_dim,env):
        
        self.action_dim = action_dim
        self.state_dim  = state_dim
        self.hidden_dim = 256

        self.value_net        = ValueNetwork(self.state_dim, self.hidden_dim).to(device)
        self.target_value_net = ValueNetwork(self.state_dim, self.hidden_dim).to(device)

        self.soft_q_net1 = SoftQNetwork(self.state_dim, self.action_dim, self.hidden_dim).to(device)
        self.soft_q_net2 = SoftQNetwork(self.state_dim, self.action_dim, self.hidden_dim).to(device)
        self.policy_net = PolicyNetwork(self.state_dim, self.action_dim, self.hidden_dim).to(device)

        for target_param, param in zip(self.target_value_net.parameters(), self.value_net.parameters()):
            target_param.data.copy_(param.data)


        self.value_criterion  = nn.MSELoss()
        self.soft_q_criterion1 = nn.MSELoss()
        self.soft_q_criterion2 = nn.MSELoss()

        self.value_lr  = 3e-4
        self.soft_q_lr = 3e-4
        self.policy_lr = 3e-4

        self.value_optimizer  = optim.Adam(self.value_net.parameters(), lr=self.value_lr)
        self.soft_q_optimizer1 = optim.Adam(self.soft_q_net1.parameters(), lr=self.soft_q_lr)
        self.soft_q_optimizer2 = optim.Adam(self.soft_q_net2.parameters(), lr=self.soft_q_lr)
        self.policy_optimizer = optim.Adam(self.policy_net.parameters(), lr=self.policy_lr)


        replay_buffer_size = 1000000
        self.replay_buffer = ReplayBuffer(replay_buffer_size)
        self.test_buffer = ReplayBufferTest(replay_buffer_size)
    
    def select_action(self,state):
        return self.policy_net.get_action(state).detach()
        
        
    def update(self, batch_size, iterations, gamma=0.99,soft_tau=1e-2,):
        
        for _ in range(iterations):
        
            state, action, reward, next_state, done = self.replay_buffer.sample(batch_size)
            
            statet, actiont, rewardt, next_statet, donet = self.test_buffer.sample(batch_size)
            
#             print("Main {}{}{}{}{}".format(state.shape, action.shape, reward.shape, next_state.shape, done.shape))
#             print("Test {}{}{}{}{}".format(statet.shape, actiont.shape, rewardt.shape, next_statet.shape, donet.shape))
            
            state      = torch.FloatTensor(state).to(device)
            next_state = torch.FloatTensor(next_state).to(device)
            action     = torch.FloatTensor(action).to(device)
            reward     = torch.FloatTensor(reward).unsqueeze(1).to(device)
            done       = torch.FloatTensor(np.float32(done)).unsqueeze(1).to(device)

            predicted_q_value1 = self.soft_q_net1(state, action)
            predicted_q_value2 = self.soft_q_net2(state, action)
            predicted_value    = self.value_net(state)
            new_action, log_prob, epsilon, mean, log_std = self.policy_net.evaluate(state)



        # Training Q Function
            target_value = self.target_value_net(next_state)
            target_q_value = reward + (1 - done) * gamma * target_value
            q_value_loss1 = self.soft_q_criterion1(predicted_q_value1, target_q_value.detach())
            q_value_loss2 = self.soft_q_criterion2(predicted_q_value2, target_q_value.detach())


            self.soft_q_optimizer1.zero_grad()
            q_value_loss1.backward()
            self.soft_q_optimizer1.step()
            self.soft_q_optimizer2.zero_grad()
            q_value_loss2.backward()
            self.soft_q_optimizer2.step()  

        # Training Value Function
            predicted_new_q_value = torch.min(self.soft_q_net1(state, new_action),self.soft_q_net2(state, new_action))
            target_value_func = predicted_new_q_value - log_prob
            value_loss = self.value_criterion(predicted_value, target_value_func.detach())


            self.value_optimizer.zero_grad()
            value_loss.backward()
            self.value_optimizer.step()
        # Training Policy Function
            policy_loss = (log_prob - predicted_new_q_value).mean()

            self.policy_optimizer.zero_grad()
            policy_loss.backward()
            self.policy_optimizer.step()


            for target_param, param in zip(self.target_value_net.parameters(), self.value_net.parameters()):
                target_param.data.copy_(
                    target_param.data * (1.0 - soft_tau) + param.data * soft_tau
                )
        
    

# Runner

In [10]:
class Runner():
    """Carries out the environment steps and adds experiences to memory"""
    
    def __init__(self, env, agent):
        
        self.env = env
        self.agent = agent
#         self.replay_buffer = replay_buffer
        self.obs = env.reset()
        self.done = False
        
    def next_step(self, episode_timesteps, noise=0.1):
        
        action = self.agent.select_action(self.obs)
        

        # Perform action
        # TODO: Clean up get action 
        new_obs, reward, done, _ = self.env.step(action.numpy()) 
        done_bool = 0 if episode_timesteps + 1 == 200 else float(done)
    
        # Store data in replay buffer
#         replay_buffer.add((self.obs, new_obs, action, reward, done_bool))
#         print("New State: ",new_obs.shape)
        self.agent.replay_buffer.push(self.obs, action, reward, new_obs, done_bool)
        self.agent.test_buffer.add([self.obs, action, reward, new_obs, done_bool])
        
        self.obs = new_obs
        
        if done:
            self.obs = self.env.reset()
            done = False
            
            return reward, True
        
        return reward, done

# Observe

In [11]:
def observe(env, agent, observation_steps):
    """run episodes while taking random actions and filling replay_buffer
    
        Args:
            env (env): gym environment
            replay_buffer(ReplayBuffer): buffer to store experience replay
            observation_steps (int): how many steps to observe for
    
    """
    
    time_steps = 0
    obs = env.reset()
    done = False

    while time_steps < observation_steps:
        action = env.action_space.sample()
        new_obs, reward, done, _ = env.step(action)

        agent.replay_buffer.push(obs, action, reward, new_obs, done)
        agent.test_buffer.add([obs, action, reward, new_obs, done])

        obs = new_obs
        time_steps += 1

        if done:
            obs = env.reset()
            done = False

        print("\rPopulating Buffer {}/{}.".format(time_steps, observation_steps), end="")
        sys.stdout.flush()

# Train

In [12]:
def train(agent, test_env):
    """Train the agent for exploration steps
    
        Args:
            agent (Agent): agent to use
            env (environment): gym environment
            writer (SummaryWriter): tensorboard writer
            exploration (int): how many training steps to run
    
    """

    total_timesteps = 0
    timesteps_since_eval = 0
    episode_num = 0
    episode_reward = 0
    episode_timesteps = 0
    done = False 
    obs = env.reset()
    evaluations = []
    rewards = []
    best_avg = -2000
    
    writer = SummaryWriter(comment="-TD3_Baseline_HalfCheetah")
    
    while total_timesteps < EXPLORATION:
    
        if done: 

            if total_timesteps != 0: 
                rewards.append(episode_reward)
                
                if total_timesteps % 1000 == 0:
                    plot(len(rewards), rewards)
                
                avg_reward = np.mean(rewards[-100:])
                
                writer.add_scalar("avg_reward", avg_reward, total_timesteps)
                writer.add_scalar("reward_step", reward, total_timesteps)
                writer.add_scalar("episode_reward", episode_reward, total_timesteps)
                
                print("\rTotal T: {:d} Episode Num: {:d} Reward: {:f} Avg Reward: {:f}".format(
                    total_timesteps, episode_num, episode_reward, avg_reward), end="")
                sys.stdout.flush()


                if avg_reward >= REWARD_THRESH:
                    break

#                 agent.update(replay_buffer, episode_timesteps, BATCH_SIZE, GAMMA, TAU, NOISE, NOISE_CLIP, POLICY_FREQUENCY)
                agent.update(100,episode_timesteps)

                episode_reward = 0
                episode_timesteps = 0
                episode_num += 1 

        reward, done = runner.next_step(episode_timesteps)
        episode_reward += reward

        episode_timesteps += 1
        total_timesteps += 1
        timesteps_since_eval += 1
        
#         agent.update(128)
        
        

# Config

In [13]:
ENV = "Pendulum-v0"
SEED = 0
OBSERVATION = 100
EXPLORATION = 40000
BATCH_SIZE = 128
GAMMA = 0.99
TAU = 0.005
NOISE = 0.2
NOISE_CLIP = 0.5
EXPLORE_NOISE = 0.1
POLICY_FREQUENCY = 2
EVAL_FREQUENCY = 5000
REWARD_THRESH = -100

# Main

In [14]:
env = NormalizedActions(gym.make(ENV))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set seeds
env.seed(SEED)
torch.manual_seed(SEED)
np.random.seed(SEED)

state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0] 
max_action = float(env.action_space.high[0])

agent = SAC(state_dim, action_dim, env)

# replay_buffer = ReplayBuffer()

runner = Runner(env, agent)

total_timesteps = 0
timesteps_since_eval = 0
episode_num = 0
done = True

In [15]:
# Populate replay buffer
observe(env, agent, OBSERVATION)

Populating Buffer 100/100.



In [16]:
# Train agent
train(agent, env)

Total T: 200 Episode Num: 0 Reward: -1692.689348 Avg Reward: -1692.689348Mu tensor([[ 7.6911e-03],
        [ 4.1492e-03],
        [ 3.3146e-03],
        [ 4.6633e-03],
        [ 1.9896e-03],
        [ 3.4118e-03],
        [ 2.7206e-03],
        [ 2.2402e-03],
        [ 2.1788e-03],
        [ 9.8215e-03],
        [ 3.8939e-03],
        [ 4.6380e-03],
        [ 3.1985e-03],
        [ 8.8270e-03],
        [ 2.7107e-03],
        [ 2.1946e-03],
        [ 6.7770e-03],
        [ 1.4697e-02],
        [ 3.5798e-03],
        [ 3.0652e-03],
        [ 2.0423e-03],
        [ 8.0846e-03],
        [ 3.2138e-03],
        [ 9.4687e-03],
        [ 1.0272e-02],
        [ 1.0355e-04],
        [ 2.5318e-03],
        [ 6.5830e-03],
        [ 1.1018e-02],
        [ 2.4424e-03],
        [ 1.5696e-03],
        [ 1.0285e-02],
        [ 3.8102e-03],
        [ 9.5631e-03],
        [ 1.4450e-02],
        [ 1.2248e-02],
        [ 6.4585e-03],
        [ 8.1978e-03],
        [ 3.7423e-03],
        [ 1.3789e-02],
    

Mu tensor([[0.0277],
        [0.0236],
        [0.0222],
        [0.0385],
        [0.0609],
        [0.0232],
        [0.0434],
        [0.1112],
        [0.0364],
        [0.0962],
        [0.0247],
        [0.1730],
        [0.0241],
        [0.0230],
        [0.0222],
        [0.0252],
        [0.0979],
        [0.0289],
        [0.0272],
        [0.0221],
        [0.0546],
        [0.0579],
        [0.0848],
        [0.0264],
        [0.0653],
        [0.0733],
        [0.0463],
        [0.0236],
        [0.0336],
        [0.0695],
        [0.0552],
        [0.0944],
        [0.0660],
        [0.0467],
        [0.0341],
        [0.0239],
        [0.0859],
        [0.0300],
        [0.0535],
        [0.0398],
        [0.0222],
        [0.1057],
        [0.0266],
        [0.0331],
        [0.0241],
        [0.0230],
        [0.0273],
        [0.0352],
        [0.0228],
        [0.1114],
        [0.0379],
        [0.0370],
        [0.0229],
        [0.0240],
        [0.0222],
       

Mu tensor([[0.0147],
        [0.0404],
        [0.0296],
        [0.0131],
        [0.0530],
        [0.0216],
        [0.0133],
        [0.0132],
        [0.0250],
        [0.0140],
        [0.0195],
        [0.0163],
        [0.0142],
        [0.0179],
        [0.0393],
        [0.0135],
        [0.0280],
        [0.0106],
        [0.0203],
        [0.0311],
        [0.0129],
        [0.0151],
        [0.0130],
        [0.0131],
        [0.0307],
        [0.0145],
        [0.0172],
        [0.0241],
        [0.0513],
        [0.0381],
        [0.0220],
        [0.0292],
        [0.0163],
        [0.0131],
        [0.0131],
        [0.0145],
        [0.0171],
        [0.0161],
        [0.0204],
        [0.0223],
        [0.0265],
        [0.0139],
        [0.0238],
        [0.0195],
        [0.0199],
        [0.0149],
        [0.0215],
        [0.0296],
        [0.0199],
        [0.0130],
        [0.0132],
        [0.0230],
        [0.0251],
        [0.0173],
        [0.0196],
       

Mu tensor([[-0.0069],
        [-0.0070],
        [-0.0071],
        [-0.0058],
        [-0.0054],
        [-0.0168],
        [-0.0314],
        [-0.0094],
        [-0.0074],
        [-0.0070],
        [-0.0092],
        [-0.0066],
        [-0.0339],
        [-0.0082],
        [-0.0362],
        [-0.0075],
        [-0.0082],
        [-0.0134],
        [-0.0246],
        [-0.0128],
        [-0.0081],
        [-0.0078],
        [-0.0058],
        [-0.0168],
        [-0.0059],
        [-0.0185],
        [-0.0591],
        [-0.0058],
        [-0.0050],
        [-0.0143],
        [-0.0066],
        [-0.0383],
        [-0.0070],
        [-0.0063],
        [-0.0080],
        [-0.0061],
        [-0.0075],
        [-0.0080],
        [-0.0131],
        [-0.0095],
        [-0.0059],
        [-0.0094],
        [-0.0226],
        [-0.0123],
        [-0.0057],
        [-0.0060],
        [-0.0373],
        [-0.0065],
        [-0.0056],
        [-0.0106],
        [-0.0099],
        [-0.0062],
        [

Mu tensor([[-0.1181],
        [-0.0719],
        [-0.0210],
        [-0.0154],
        [-0.0838],
        [-0.0627],
        [-0.0196],
        [-0.0166],
        [-0.0174],
        [-0.0163],
        [-0.0188],
        [-0.0216],
        [-0.0157],
        [-0.0488],
        [-0.0366],
        [-0.0178],
        [-0.0314],
        [-0.0166],
        [-0.0272],
        [-0.0197],
        [-0.0165],
        [-0.0696],
        [-0.0247],
        [-0.0180],
        [-0.0225],
        [-0.0227],
        [-0.0576],
        [-0.0184],
        [-0.0354],
        [-0.0164],
        [-0.0604],
        [-0.0997],
        [-0.0205],
        [-0.0166],
        [-0.0418],
        [-0.0234],
        [-0.0165],
        [-0.0302],
        [-0.0157],
        [-0.0304],
        [-0.0440],
        [-0.0153],
        [-0.0502],
        [-0.0277],
        [-0.0194],
        [-0.0202],
        [-0.0388],
        [-0.0485],
        [-0.0270],
        [-0.0284],
        [-0.0157],
        [-0.0268],
        [

Mu tensor([[-0.0285],
        [-0.0389],
        [-0.0714],
        [-0.0589],
        [-0.0331],
        [-0.0237],
        [-0.0251],
        [-0.0534],
        [-0.0288],
        [-0.0234],
        [-0.0236],
        [-0.0244],
        [-0.0324],
        [-0.0317],
        [-0.0449],
        [-0.0231],
        [-0.1475],
        [-0.0231],
        [-0.0284],
        [-0.0258],
        [-0.0774],
        [-0.0249],
        [-0.0250],
        [-0.0278],
        [-0.1157],
        [-0.0401],
        [-0.0615],
        [-0.0557],
        [-0.0377],
        [-0.0330],
        [-0.0252],
        [-0.0679],
        [-0.0268],
        [-0.0242],
        [-0.0336],
        [-0.0438],
        [-0.0201],
        [-0.0396],
        [-0.0228],
        [-0.0213],
        [-0.0241],
        [-0.0924],
        [-0.0830],
        [-0.1220],
        [-0.0226],
        [-0.0905],
        [-0.0383],
        [-0.0216],
        [-0.0231],
        [-0.0400],
        [-0.0260],
        [-0.0381],
        [

Mu tensor([[-0.0020],
        [-0.0002],
        [ 0.0186],
        [-0.0040],
        [-0.0256],
        [ 0.0217],
        [ 0.0030],
        [ 0.0016],
        [ 0.0180],
        [-0.0001],
        [ 0.0020],
        [ 0.0018],
        [ 0.0024],
        [-0.0039],
        [ 0.0157],
        [-0.0028],
        [ 0.0006],
        [ 0.0176],
        [-0.0074],
        [-0.0016],
        [ 0.0022],
        [ 0.0079],
        [ 0.0121],
        [-0.0007],
        [ 0.0011],
        [-0.0011],
        [ 0.0033],
        [-0.0086],
        [-0.0109],
        [-0.0015],
        [-0.0003],
        [-0.0120],
        [ 0.0196],
        [-0.0002],
        [-0.0044],
        [-0.0123],
        [ 0.0053],
        [-0.0095],
        [-0.0005],
        [ 0.0012],
        [ 0.0024],
        [-0.0115],
        [ 0.0006],
        [-0.0026],
        [-0.0009],
        [ 0.0090],
        [ 0.0100],
        [-0.0339],
        [-0.0001],
        [-0.0061],
        [-0.0036],
        [-0.0046],
        [

Mu tensor([[0.0129],
        [0.0201],
        [0.0218],
        [0.0285],
        [0.0825],
        [0.0175],
        [0.0156],
        [0.0174],
        [0.0190],
        [0.0327],
        [0.0213],
        [0.0134],
        [0.0825],
        [0.0123],
        [0.0148],
        [0.0178],
        [0.0201],
        [0.0153],
        [0.0142],
        [0.0147],
        [0.0269],
        [0.0142],
        [0.0163],
        [0.0385],
        [0.0307],
        [0.0153],
        [0.0153],
        [0.0149],
        [0.0693],
        [0.0164],
        [0.0148],
        [0.0825],
        [0.0161],
        [0.0381],
        [0.0194],
        [0.0149],
        [0.0689],
        [0.0150],
        [0.0140],
        [0.0419],
        [0.0298],
        [0.0135],
        [0.0146],
        [0.0212],
        [0.0200],
        [0.0539],
        [0.0784],
        [0.0200],
        [0.0162],
        [0.0133],
        [0.0747],
        [0.0202],
        [0.0124],
        [0.0232],
        [0.0138],
       

Mu tensor([[0.0138],
        [0.0800],
        [0.0117],
        [0.0146],
        [0.0100],
        [0.0215],
        [0.0137],
        [0.0138],
        [0.0197],
        [0.0125],
        [0.0144],
        [0.0829],
        [0.0156],
        [0.0169],
        [0.0136],
        [0.0135],
        [0.0139],
        [0.0150],
        [0.0661],
        [0.0255],
        [0.0127],
        [0.0120],
        [0.0550],
        [0.0305],
        [0.0233],
        [0.0148],
        [0.0174],
        [0.0129],
        [0.0169],
        [0.0128],
        [0.0144],
        [0.0136],
        [0.0176],
        [0.0147],
        [0.0165],
        [0.0277],
        [0.0123],
        [0.0181],
        [0.0144],
        [0.0229],
        [0.0909],
        [0.0129],
        [0.0121],
        [0.0130],
        [0.0164],
        [0.0238],
        [0.0226],
        [0.0137],
        [0.0165],
        [0.0208],
        [0.0196],
        [0.0130],
        [0.0235],
        [0.0132],
        [0.0199],
       

Mu tensor([[0.0350],
        [0.0354],
        [0.0399],
        [0.0370],
        [0.0412],
        [0.0499],
        [0.0674],
        [0.0440],
        [0.1481],
        [0.0402],
        [0.0562],
        [0.0380],
        [0.0376],
        [0.1391],
        [0.1268],
        [0.0332],
        [0.0473],
        [0.0361],
        [0.0399],
        [0.0351],
        [0.0424],
        [0.0362],
        [0.0366],
        [0.0350],
        [0.0596],
        [0.0347],
        [0.0836],
        [0.0353],
        [0.0304],
        [0.0393],
        [0.0612],
        [0.1168],
        [0.1513],
        [0.0389],
        [0.0408],
        [0.0997],
        [0.0359],
        [0.0455],
        [0.0360],
        [0.0489],
        [0.0469],
        [0.0394],
        [0.0351],
        [0.0323],
        [0.0347],
        [0.0350],
        [0.0616],
        [0.0324],
        [0.0446],
        [0.0312],
        [0.1471],
        [0.0338],
        [0.0368],
        [0.0350],
        [0.0348],
       

Mu tensor([[0.0220],
        [0.0300],
        [0.0295],
        [0.0159],
        [0.0358],
        [0.0207],
        [0.0155],
        [0.0129],
        [0.0168],
        [0.0118],
        [0.0996],
        [0.0670],
        [0.0203],
        [0.0410],
        [0.0291],
        [0.0435],
        [0.0835],
        [0.0205],
        [0.0531],
        [0.0155],
        [0.0260],
        [0.0301],
        [0.0187],
        [0.0212],
        [0.0791],
        [0.0316],
        [0.0523],
        [0.0970],
        [0.0335],
        [0.0183],
        [0.0189],
        [0.0279],
        [0.0265],
        [0.0242],
        [0.0220],
        [0.0185],
        [0.0493],
        [0.0161],
        [0.0670],
        [0.0203],
        [0.0145],
        [0.0214],
        [0.0885],
        [0.0248],
        [0.0855],
        [0.0199],
        [0.0210],
        [0.0252],
        [0.0172],
        [0.0154],
        [0.0303],
        [0.0236],
        [0.0665],
        [0.0329],
        [0.0313],
       

Mu tensor([[-0.0105],
        [ 0.0218],
        [ 0.0177],
        [ 0.0122],
        [ 0.0588],
        [-0.0100],
        [-0.0027],
        [ 0.0169],
        [ 0.0107],
        [ 0.0018],
        [ 0.0014],
        [ 0.0455],
        [ 0.0032],
        [ 0.0304],
        [-0.0044],
        [-0.0023],
        [-0.0056],
        [-0.0282],
        [ 0.0287],
        [-0.0100],
        [ 0.0492],
        [-0.0026],
        [ 0.0073],
        [-0.0071],
        [ 0.0088],
        [-0.0064],
        [ 0.0111],
        [-0.0036],
        [ 0.0028],
        [-0.0029],
        [ 0.0330],
        [-0.0105],
        [ 0.0101],
        [-0.0080],
        [-0.0038],
        [ 0.0326],
        [ 0.0086],
        [ 0.0076],
        [-0.0011],
        [ 0.0357],
        [ 0.0037],
        [ 0.0036],
        [ 0.0118],
        [ 0.0047],
        [ 0.0144],
        [-0.0108],
        [ 0.0026],
        [-0.0039],
        [ 0.0438],
        [-0.0021],
        [ 0.0144],
        [ 0.0060],
        [

KeyboardInterrupt: 