In [20]:
import numpy as np
from random import random
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch import optim

import random
import os
import time
import matplotlib.pyplot as plt

from torch.autograd import Variable

In [1]:
class follower:
    
    def __init__(self,X0,dt):
        self.X  = X0
        self.dt = dt
        
    def step(self,u,w):
        
        self.X = self.X + np.array([u*np.cos(self.X[2]),u*np.sin(self.X[2]),w])*dt
        
        return self.X
        
class target:
    
    def __init__(self,X0,dt):
        self.X = X0
        self.dt = dt
        self.t0 = 0
        self.speed = 0
        self.theta = 0
        
    def step(self,a,alpha):
        
        if (self.speed<2):
            self.speed = self.speed + a*self.dt
            
        self.theta = self.theta + alpha*dt
        
        if self.theta>np.pi:
            self.theta = self.theta - 2*np.pi
        if self.theta<-np.pi:
            self.theta = self.theta + 2*np.pi
        
        self.X = self.X + np.array([ self.speed*np.cos(self.theta),self.speed*np.sin(self.theta) ])*dt
        return self.X
    
def wrap_angle(angle):
    if angle>np.pi:
        angle = angle - 2*np.pi
    if angle<-np.pi:
        angle = angle + 2*np.pi
    return angle
    
def compute_reward(F_X,T_X):
    
    FoV = 30*np.pi/180
    max_D = 3
    min_D = 0.3
    beta = np.arctan2(T_X[1]-F_X[1],T_X[0]-F_X[0])
    
    angle_diff = wrap_angle(beta - F_X[2])
    
    distance = np.sqrt( (T_X[0]-F_X[0])**2 + (T_X[1]-F_X[1])**2 )
    
    if np.abs(angle_diff)>FoV:
        reward_angle = -1
    else:
        reward_angle = np.abs(FoV-angle_diff)/FoV
    
    if distance>max_D:
        reward_distance = -1
    elif distance<min_D:
        reward_distance = -1
    else:
        reward_distance = np.abs(distance-min_D)*np.abs(distance-max_D)*4/(max_D-min_D)**2
        
    reward = reward_angle*reward_distance
    
    return reward
    

In [42]:
def decode_action(action):
        if action==0:
            u = 0;v=0             
        elif action==1:
            u = 0;v=0.4
        elif action==2:
            u = 0;v=-0.4
        elif action==3:
            u = 1;v=0
        elif action==4:
            u = 1;v=0.4
        else: # action==5:
            u = 1;v=-0.4
        return u,v

In [69]:
def eval_policy(agent,env_name,eval_episodes=10):
    #initialize
    
    avg_reward = 0.
    for _ in range(eval_episodes):
        agentF = follower(np.array([0,0.2,0]),dt)
        agentT = target(np.array([1,0]),dt)
        done = False
        while not done:
            T = agentT.X
            F = agentF.X    
            #print(T)
            #print(F)
            state = np.array([F[0],F[1],F[2],T[0],T[1]])
    
            action = agent.policy(state)
            u,v = decode_action(action)
            
            T_ns = agentT.step(0.2,0.5)        
            F_ns = agentF.step(u,v)
            reward = compute_reward(F_ns,T_ns)
            
            #print(reward)
            
            if reward<0:
                done = True
                #print("done")
            #print("running")
   
            avg_reward += reward
    avg_reward /= eval_episodes
    #print("---------------------------------------")
    print(f"Evaluation over {eval_episodes} episodes: {avg_reward:.3f}")
    print("---------------------------------------")
    return avg_reward

In [71]:
class NETWORK(torch.nn.Module):
    def __init__(self, input_dim: int, output_dim: int, hidden_dim: int) -> None:

        super(NETWORK, self).__init__()

        self.layer1 = torch.nn.Sequential(
             torch.nn.Linear(input_dim, hidden_dim),
             torch.nn.ReLU()
         )

        self.layer2 = torch.nn.Sequential(
           torch.nn.Linear(hidden_dim, hidden_dim),
           torch.nn.ReLU()
        )

        self.adv = nn.Linear(hidden_dim,output_dim)
        self.val = nn.Linear(hidden_dim,1)


    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.layer1(x)
        x = self.layer2(x)
        adv = self.adv(x)
        val = self.val(x)
        x = val + adv - adv.mean(1,keepdim=True)
        return x



class DQN(object):
    def __init__(self):
        self.action_dim = 6  # 2 speeds x 3 angular
        self.state_dim = 5 # 3 F + 2 T
        self.hidden_dim = 64

        self.epsilon = 0.1  
        self.epsilon_decay = 0.9995
        self.epsilon_min = 0.01
        self.epsilon_max = 1.0
        self.gamma = 0.98           # discount factor
        self.beta = 0.001           # Learning Rate #0.3 

        # Behaviour Policy
        self.primal_network = NETWORK(self.state_dim, self.action_dim, self.hidden_dim)
        self.optimizer_primal = optim.Adam(self.primal_network.parameters(),self.beta)
        
        # Target Policy
        self.target_network = NETWORK(self.state_dim, self.action_dim, self.hidden_dim)

        self.replay_buffer = 50000     # All dataset
        self.batch_size = 64           # Randomly sample a batch from replay buffer
        self.target_update = 4 # No. of episodes
        self.episode_counter = 0

        self.memory = []

        self.counter = 0

    def select_action(self, states: np.ndarray) -> int:
        
        if (np.random.random()<self.epsilon):
            # Exploration
            with torch.no_grad():
                action = torch.tensor( [ [random.randrange(self.action_dim)] ] )   #int(np.random.choice(2, 1))
                action = action.item()
        else:
                # Exploitation
                action = self.policy(states)
        #print(action,states)
        return action

    def policy(self, states: np.ndarray) -> int:  # policy is primal network?

        states = torch.FloatTensor(states).unsqueeze(0)
        with torch.no_grad():
                action = self.primal_network(states).max(1)[1].view(1,1)
        return action.item()

    def train(self,s0,a0,r,s1,sign):

        if sign==1:
            self.episode_counter += 1
            
        if (len(self.memory)>self.replay_buffer):
            del self.memory[0]

        self.memory.append((s0,a0,r,s1,sign))
        
        if len(self.memory)>=2*self.batch_size:

            if ( (self.episode_counter>=1) and (self.episode_counter % self.target_update == 0) ) :
                self.target_network.load_state_dict(self.primal_network.state_dict())
                self.episode_counter = 0
            
            transitions = random.sample(self.memory, self.batch_size)
            batch_state, batch_action, batch_reward, batch_next_state, batch_terminal = zip(*transitions)
            
            batch_state = torch.FloatTensor(batch_state)
            batch_action = torch.LongTensor(batch_action)
            batch_reward = torch.FloatTensor(batch_reward)
            batch_next_state = torch.FloatTensor(batch_next_state)
            batch_terminal = torch.FloatTensor(batch_terminal)

            Q_values = self.primal_network(batch_state)[range(self.batch_size), batch_action]  #.data

            with torch.no_grad():
                next_action = self.primal_network( batch_next_state ).max(1)[1]
                next_state_values = self.target_network(batch_next_state)[range(self.batch_size), next_action]
                expected_Q_values = batch_reward + self.gamma*(1 - batch_terminal)*next_state_values

            loss = (Q_values - expected_Q_values).pow(2).mean()

            self.optimizer_primal.zero_grad()
            loss.backward()
            self.optimizer_primal.step()

        if sign==1:
         	if self.epsilon > self.epsilon_min*self.epsilon_decay:
         		self.epsilon *= self.epsilon_decay
        	

In [76]:
env_name = 'Target-Follower'
#env = gym.make(env_name)

dt = 0.1

    
agent = DQN() 

start_time = time.time()

reward_plot = []

for i in range(10000):
    #print("start")
    #initialize
    agentF = follower(np.array([0,0,0]),dt)
    agentT = target(np.array([1,0]),dt)
    done = False
    
    episodic_reward = 0
    
    T = agentT.X
    F = agentF.X    
    state = np.array([F[0],F[1],F[2],T[0],T[1]])
    
     #action to input
    
    while not done:
        action = agent.select_action(np.squeeze(state))
        
        u,v = decode_action(action)
        
       
        T_ns = agentT.step(0.2,0.5
                          )        
        F_ns = agentF.step(u,v)
        reward = compute_reward(F_ns,T_ns)
        if reward<0:
            done = True
        #next_state, reward, done, info = env.step(action)
        #env.render()
        episodic_reward += reward      
        sign = 1 if done else 0
        
        state = np.array([F[0],F[1],F[2],T[0],T[1]])
        next_state = np.array([F_ns[0],F_ns[1],F_ns[2],T_ns[0],T_ns[1]])
        
        agent.train(state,action,reward,next_state,sign)
        #print('here')
        next_state = state
    print (f'episode: {i}, reward: {episodic_reward}')         
    if i % 5 == 0:
        eval_reward = eval_policy(agent,env_name,eval_episodes=50)
        reward_plot.append(eval_reward)
        #env.render()
        print(eval_reward)
        if eval_reward >= 100:
            print("Problem solved in {} episodes".format(i + 1))
            break
    #print("done?")
    #print(i)


print('Complete')
#env.render()
#env.close()


print("----------- seconds ------------")
print(time.time() - start_time)

episode: 0, reward: 13.207390283637366
Evaluation over 50 episodes: 8.934
---------------------------------------
tensor(8.9338, dtype=torch.float64)
episode: 1, reward: 12.884303133834594
episode: 2, reward: 13.591184822159741
episode: 3, reward: 13.188842055823251
episode: 4, reward: 13.553672549005645
episode: 5, reward: 12.884303133834594
Evaluation over 50 episodes: 8.934
---------------------------------------
tensor(8.9338, dtype=torch.float64)
episode: 6, reward: 11.57947238227713
episode: 7, reward: 12.849521166738276
episode: 8, reward: 11.458301004175295
episode: 9, reward: 12.884303133834594
episode: 10, reward: 11.953675683319116
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 11, reward: 11.873380001937285
episode: 12, reward: 12.884303133834594
episode: 13, reward: 12.68594124303923
episode: 14, reward: 12.884303133834594
episode: 15, reward: 13.060814485587962
Evaluation over 50 episodes: 15.977
-

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 136, reward: 13.459344430564302
episode: 137, reward: 12.314936167053494
episode: 138, reward: 12.884303133834594
episode: 139, reward: 12.884303133834594
episode: 140, reward: 12.451402081087771
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 141, reward: 12.884303133834594
episode: 142, reward: 12.932462195899031
episode: 143, reward: 12.884303133834594
episode: 144, reward: 13.553672549005645
episode: 145, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 146, reward: 11.140613113368419
episode: 147, reward: 12.884303133834594
episode: 148, reward: 12.884303133834594
episode: 149, reward: 12.872645515522317
episode: 150, reward: 12.446690880065677
Evaluation over 50 episodes: 15.977
----------

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 266, reward: 12.884303133834594
episode: 267, reward: 12.884303133834594
episode: 268, reward: 12.884303133834594
episode: 269, reward: 12.174177418483564
episode: 270, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 271, reward: 12.884303133834594
episode: 272, reward: 11.44929622625176
episode: 273, reward: 12.884303133834594
episode: 274, reward: 12.884303133834594
episode: 275, reward: 12.350842967249298
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 276, reward: 12.455287075609922
episode: 277, reward: 12.026271017385255
episode: 278, reward: 13.025434260890536
episode: 279, reward: 13.488243978468917
episode: 280, reward: 12.992503498383027
Evaluation over 50 episodes: 15.977
-----------

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 396, reward: 12.884303133834594
episode: 397, reward: 13.823458592630974
episode: 398, reward: 12.18995701467731
episode: 399, reward: 12.884303133834594
episode: 400, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 401, reward: 12.884303133834594
episode: 402, reward: 12.932528274178754
episode: 403, reward: 13.434031721932394
episode: 404, reward: 12.884303133834594
episode: 405, reward: 12.452601542501796
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 406, reward: 11.097511412353587
episode: 407, reward: 13.284353954870534
episode: 408, reward: 12.774150069390963
episode: 409, reward: 11.953675683319116
episode: 410, reward: 11.376098732142532
Evaluation over 50 episodes: 15.977
-----------

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 526, reward: 11.920741959222546
episode: 527, reward: 11.761366190088388
episode: 528, reward: 12.884303133834594
episode: 529, reward: 13.129871547840509
episode: 530, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 531, reward: 12.755892668308954
episode: 532, reward: 12.884303133834594
episode: 533, reward: 12.884303133834594
episode: 534, reward: 12.884303133834594
episode: 535, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 536, reward: 13.305308694174293
episode: 537, reward: 12.884303133834594
episode: 538, reward: 12.818890519738453
episode: 539, reward: 12.884303133834594
episode: 540, reward: 12.364109100447216
Evaluation over 50 episodes: 15.977
----------

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 656, reward: 12.884303133834594
episode: 657, reward: 12.884303133834594
episode: 658, reward: 12.884303133834594
episode: 659, reward: 12.884303133834594
episode: 660, reward: 13.30560401901003
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 661, reward: 12.885704833899414
episode: 662, reward: 12.79985769406504
episode: 663, reward: 13.247696182617323
episode: 664, reward: 12.884303133834594
episode: 665, reward: 12.267144705811273
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 666, reward: 13.255764624552297
episode: 667, reward: 12.884303133834594
episode: 668, reward: 12.884303133834594
episode: 669, reward: 12.976107021274254
episode: 670, reward: 13.353195730780133
Evaluation over 50 episodes: 15.977
------------

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 786, reward: 12.884303133834594
episode: 787, reward: 12.884303133834594
episode: 788, reward: 12.884303133834594
episode: 789, reward: 12.208038338620668
episode: 790, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 791, reward: 12.884303133834594
episode: 792, reward: 13.970777980374576
episode: 793, reward: 13.426846390059264
episode: 794, reward: 12.884303133834594
episode: 795, reward: 12.959733799566765
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 796, reward: 12.884303133834594
episode: 797, reward: 13.781892545161703
episode: 798, reward: 12.884303133834594
episode: 799, reward: 12.316333120624632
episode: 800, reward: 12.872645515522317
Evaluation over 50 episodes: 15.977
----------

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 916, reward: 12.884303133834594
episode: 917, reward: 12.884303133834594
episode: 918, reward: 12.884303133834594
episode: 919, reward: 12.18995701467731
episode: 920, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 921, reward: 13.383052482146386
episode: 922, reward: 13.037091879202814
episode: 923, reward: 12.884303133834594
episode: 924, reward: 12.884303133834594
episode: 925, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 926, reward: 12.488184130177872
episode: 927, reward: 12.884303133834594
episode: 928, reward: 11.770031309229212
episode: 929, reward: 13.118469062757315
episode: 930, reward: 13.15906546975211
Evaluation over 50 episodes: 15.977
------------

episode: 1045, reward: 12.882274283062454
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1046, reward: 13.284353954870534
episode: 1047, reward: 13.04424181688539
episode: 1048, reward: 12.775127685353606
episode: 1049, reward: 13.530612575307082
episode: 1050, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1051, reward: 12.931193382038
episode: 1052, reward: 13.129871547840509
episode: 1053, reward: 11.530289177575572
episode: 1054, reward: 12.884303133834594
episode: 1055, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1056, reward: 12.992503498383027
episode: 1057, reward: 11.619195699520715
episode: 1058, reward: 13.391533701453035
episode: 1059, reward: 13.391533701453035
episode: 1060, reward: 12.884303133

episode: 1171, reward: 12.884303133834594
episode: 1172, reward: 12.884303133834594
episode: 1173, reward: 12.130200932988563
episode: 1174, reward: 12.884303133834594
episode: 1175, reward: 13.284353954870534
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1176, reward: 10.887376884425521
episode: 1177, reward: 12.884303133834594
episode: 1178, reward: 12.605252641835273
episode: 1179, reward: 12.884303133834594
episode: 1180, reward: 13.025434260890536
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1181, reward: 12.884303133834594
episode: 1182, reward: 11.683875634416033
episode: 1183, reward: 12.884303133834594
episode: 1184, reward: 13.334180390015844
episode: 1185, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1186, reward: 12.88430

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1301, reward: 12.884303133834594
episode: 1302, reward: 12.68154317764852
episode: 1303, reward: 12.884303133834594
episode: 1304, reward: 12.422703319523713
episode: 1305, reward: 12.2316104397961
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1306, reward: 12.884303133834594
episode: 1307, reward: 12.79985769406504
episode: 1308, reward: 13.366708226914824
episode: 1309, reward: 12.884303133834594
episode: 1310, reward: 13.284353954870534
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1311, reward: 12.932462195899031
episode: 1312, reward: 12.884303133834594
episode: 1313, reward: 13.530612575307082
episode: 1314, reward: 12.455287075609922
episode: 1315, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1431, reward: 12.884303133834594
episode: 1432, reward: 12.884303133834594
episode: 1433, reward: 12.01937266769822
episode: 1434, reward: 12.884303133834594
episode: 1435, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1436, reward: 12.884303133834594
episode: 1437, reward: 13.358616436081116
episode: 1438, reward: 13.553672549005645
episode: 1439, reward: 12.251749416677653
episode: 1440, reward: 12.135545004355809
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1441, reward: 12.884303133834594
episode: 1442, reward: 12.035973126027992
episode: 1443, reward: 12.884303133834594
episode: 1444, reward: 12.2316104397961
episode: 1445, reward: 12.884303133834594
Evaluation over 50 episodes: 15.97

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1561, reward: 12.884303133834594
episode: 1562, reward: 12.884303133834594
episode: 1563, reward: 13.129871547840509
episode: 1564, reward: 12.884303133834594
episode: 1565, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1566, reward: 12.884303133834594
episode: 1567, reward: 12.884303133834594
episode: 1568, reward: 12.884303133834594
episode: 1569, reward: 11.715535091206917
episode: 1570, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1571, reward: 13.484702074667752
episode: 1572, reward: 11.884014901602614
episode: 1573, reward: 13.484702074667752
episode: 1574, reward: 12.818890519738453
episode: 1575, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1691, reward: 12.884303133834594
episode: 1692, reward: 13.27493170544108
episode: 1693, reward: 12.384159017718604
episode: 1694, reward: 13.591184822159741
episode: 1695, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1696, reward: 12.884303133834594
episode: 1697, reward: 11.74836310741467
episode: 1698, reward: 12.884303133834594
episode: 1699, reward: 12.60954687035512
episode: 1700, reward: 12.980621257963469
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1701, reward: 12.884303133834594
episode: 1702, reward: 12.884303133834594
episode: 1703, reward: 12.884303133834594
episode: 1704, reward: 13.57923502914169
episode: 1705, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1821, reward: 12.884303133834594
episode: 1822, reward: 12.884303133834594
episode: 1823, reward: 12.920639683311311
episode: 1824, reward: 12.884303133834594
episode: 1825, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1826, reward: 12.884303133834594
episode: 1827, reward: 12.174307569531365
episode: 1828, reward: 12.884303133834594
episode: 1829, reward: 11.57947238227713
episode: 1830, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1831, reward: 12.884303133834594
episode: 1832, reward: 12.992503498383027
episode: 1833, reward: 13.025434260890536
episode: 1834, reward: 13.035164465298935
episode: 1835, reward: 12.884303133834594
Evaluation over 50 episodes: 15.

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1951, reward: 12.734585320429716
episode: 1952, reward: 13.188842055823251
episode: 1953, reward: 12.884303133834594
episode: 1954, reward: 12.884303133834594
episode: 1955, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1956, reward: 12.884303133834594
episode: 1957, reward: 12.884303133834594
episode: 1958, reward: 12.884303133834594
episode: 1959, reward: 12.884303133834594
episode: 1960, reward: 12.68154317764852
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 1961, reward: 12.68154317764852
episode: 1962, reward: 12.949039888206427
episode: 1963, reward: 12.627482202783314
episode: 1964, reward: 13.425278288458706
episode: 1965, reward: 12.884303133834594
Evaluation over 50 episodes: 15.9

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2081, reward: 12.884303133834594
episode: 2082, reward: 12.884303133834594
episode: 2083, reward: 12.884303133834594
episode: 2084, reward: 13.204930451394734
episode: 2085, reward: 13.530612575307082
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2086, reward: 12.18995701467731
episode: 2087, reward: 12.884303133834594
episode: 2088, reward: 12.884303133834594
episode: 2089, reward: 13.30560401901003
episode: 2090, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2091, reward: 13.115206639400526
episode: 2092, reward: 12.960697506518704
episode: 2093, reward: 12.884303133834594
episode: 2094, reward: 12.884303133834594
episode: 2095, reward: 12.884303133834594
Evaluation over 50 episodes: 15.9

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2211, reward: 12.884303133834594
episode: 2212, reward: 12.884303133834594
episode: 2213, reward: 12.884303133834594
episode: 2214, reward: 13.188842055823251
episode: 2215, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2216, reward: 12.884303133834594
episode: 2217, reward: 12.885704833899414
episode: 2218, reward: 12.884303133834594
episode: 2219, reward: 12.884303133834594
episode: 2220, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2221, reward: 12.884303133834594
episode: 2222, reward: 12.884303133834594
episode: 2223, reward: 12.932462195899031
episode: 2224, reward: 11.47071738120917
episode: 2225, reward: 12.884303133834594
Evaluation over 50 episodes: 15.

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2341, reward: 12.222314218710428
episode: 2342, reward: 12.884303133834594
episode: 2343, reward: 12.884303133834594
episode: 2344, reward: 12.79985769406504
episode: 2345, reward: 12.529305351682977
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2346, reward: 12.884303133834594
episode: 2347, reward: 12.884303133834594
episode: 2348, reward: 12.18995701467731
episode: 2349, reward: 12.884303133834594
episode: 2350, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2351, reward: 12.884303133834594
episode: 2352, reward: 12.07264353345891
episode: 2353, reward: 12.884303133834594
episode: 2354, reward: 12.884303133834594
episode: 2355, reward: 12.884303133834594
Evaluation over 50 episodes: 15.97

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2471, reward: 12.326202149835954
episode: 2472, reward: 12.884303133834594
episode: 2473, reward: 12.884303133834594
episode: 2474, reward: 11.49561089552003
episode: 2475, reward: 12.931193382038
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2476, reward: 12.884303133834594
episode: 2477, reward: 12.980621257963469
episode: 2478, reward: 12.884303133834594
episode: 2479, reward: 12.884303133834594
episode: 2480, reward: 13.577826417581463
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2481, reward: 12.251749416677653
episode: 2482, reward: 12.884303133834594
episode: 2483, reward: 12.884303133834594
episode: 2484, reward: 12.174307569531365
episode: 2485, reward: 13.591184822159741
Evaluation over 50 episodes: 15.977

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2601, reward: 12.884303133834594
episode: 2602, reward: 13.301721891886613
episode: 2603, reward: 13.530612575307082
episode: 2604, reward: 13.229394853900425
episode: 2605, reward: 12.959733799566765
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2606, reward: 12.68154317764852
episode: 2607, reward: 12.884303133834594
episode: 2608, reward: 11.74836310741467
episode: 2609, reward: 12.316333120624632
episode: 2610, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2611, reward: 12.884303133834594
episode: 2612, reward: 12.884303133834594
episode: 2613, reward: 12.403042333511701
episode: 2614, reward: 12.884303133834594
episode: 2615, reward: 12.884303133834594
Evaluation over 50 episodes: 15.9

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2731, reward: 13.553672549005645
episode: 2732, reward: 12.884303133834594
episode: 2733, reward: 12.884303133834594
episode: 2734, reward: 12.884303133834594
episode: 2735, reward: 13.383052482146386
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2736, reward: 12.916976407552498
episode: 2737, reward: 12.884303133834594
episode: 2738, reward: 12.884303133834594
episode: 2739, reward: 12.884303133834594
episode: 2740, reward: 12.980621257963469
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2741, reward: 12.884303133834594
episode: 2742, reward: 12.884303133834594
episode: 2743, reward: 12.884303133834594
episode: 2744, reward: 12.884303133834594
episode: 2745, reward: 12.316333120624632
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2861, reward: 12.884303133834594
episode: 2862, reward: 12.884303133834594
episode: 2863, reward: 12.884303133834594
episode: 2864, reward: 12.884303133834594
episode: 2865, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2866, reward: 12.174307569531365
episode: 2867, reward: 12.884303133834594
episode: 2868, reward: 12.884303133834594
episode: 2869, reward: 12.884303133834594
episode: 2870, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2871, reward: 12.884303133834594
episode: 2872, reward: 12.884303133834594
episode: 2873, reward: 12.960697506518704
episode: 2874, reward: 12.76618824578523
episode: 2875, reward: 11.292850939333958
Evaluation over 50 episodes: 15.

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2991, reward: 12.884303133834594
episode: 2992, reward: 12.884303133834594
episode: 2993, reward: 12.884303133834594
episode: 2994, reward: 12.884303133834594
episode: 2995, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 2996, reward: 12.884303133834594
episode: 2997, reward: 12.384159017718604
episode: 2998, reward: 12.884303133834594
episode: 2999, reward: 12.884303133834594
episode: 3000, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3001, reward: 12.884303133834594
episode: 3002, reward: 12.884303133834594
episode: 3003, reward: 12.884303133834594
episode: 3004, reward: 12.884303133834594
episode: 3005, reward: 13.305308694174293
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3121, reward: 12.884303133834594
episode: 3122, reward: 12.884303133834594
episode: 3123, reward: 13.30560401901003
episode: 3124, reward: 12.884303133834594
episode: 3125, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3126, reward: 12.884303133834594
episode: 3127, reward: 12.884303133834594
episode: 3128, reward: 12.884303133834594
episode: 3129, reward: 12.960697506518704
episode: 3130, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3131, reward: 12.884303133834594
episode: 3132, reward: 12.884303133834594
episode: 3133, reward: 12.529305351682977
episode: 3134, reward: 12.18995701467731
episode: 3135, reward: 12.884303133834594
Evaluation over 50 episodes: 15.9

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3251, reward: 12.884303133834594
episode: 3252, reward: 12.884303133834594
episode: 3253, reward: 12.884303133834594
episode: 3254, reward: 12.884303133834594
episode: 3255, reward: 12.775127685353606
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3256, reward: 13.0700797228311
episode: 3257, reward: 12.884303133834594
episode: 3258, reward: 12.884303133834594
episode: 3259, reward: 12.884303133834594
episode: 3260, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3261, reward: 12.884303133834594
episode: 3262, reward: 12.884303133834594
episode: 3263, reward: 12.884303133834594
episode: 3264, reward: 12.884303133834594
episode: 3265, reward: 12.884303133834594
Evaluation over 50 episodes: 15.9

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3381, reward: 12.884303133834594
episode: 3382, reward: 12.884303133834594
episode: 3383, reward: 12.884303133834594
episode: 3384, reward: 12.884303133834594
episode: 3385, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3386, reward: 12.884303133834594
episode: 3387, reward: 12.884303133834594
episode: 3388, reward: 12.884303133834594
episode: 3389, reward: 12.884303133834594
episode: 3390, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3391, reward: 12.884303133834594
episode: 3392, reward: 12.884303133834594
episode: 3393, reward: 12.884303133834594
episode: 3394, reward: 12.884303133834594
episode: 3395, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3511, reward: 12.884303133834594
episode: 3512, reward: 13.247696182617323
episode: 3513, reward: 13.383052482146386
episode: 3514, reward: 12.884303133834594
episode: 3515, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3516, reward: 12.884303133834594
episode: 3517, reward: 12.884303133834594
episode: 3518, reward: 12.884303133834594
episode: 3519, reward: 12.884303133834594
episode: 3520, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3521, reward: 12.403042333511701
episode: 3522, reward: 12.884303133834594
episode: 3523, reward: 12.884303133834594
episode: 3524, reward: 12.884303133834594
episode: 3525, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3641, reward: 12.884303133834594
episode: 3642, reward: 13.30560401901003
episode: 3643, reward: 12.884303133834594
episode: 3644, reward: 12.884303133834594
episode: 3645, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3646, reward: 12.884303133834594
episode: 3647, reward: 12.884303133834594
episode: 3648, reward: 12.884303133834594
episode: 3649, reward: 12.884303133834594
episode: 3650, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3651, reward: 12.884303133834594
episode: 3652, reward: 12.884303133834594
episode: 3653, reward: 12.884303133834594
episode: 3654, reward: 12.884303133834594
episode: 3655, reward: 12.884303133834594
Evaluation over 50 episodes: 15.

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3771, reward: 12.884303133834594
episode: 3772, reward: 12.884303133834594
episode: 3773, reward: 12.884303133834594
episode: 3774, reward: 12.884303133834594
episode: 3775, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3776, reward: 12.884303133834594
episode: 3777, reward: 12.884303133834594
episode: 3778, reward: 12.884303133834594
episode: 3779, reward: 12.884303133834594
episode: 3780, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3781, reward: 12.884303133834594
episode: 3782, reward: 12.884303133834594
episode: 3783, reward: 12.884303133834594
episode: 3784, reward: 12.884303133834594
episode: 3785, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3901, reward: 11.74836310741467
episode: 3902, reward: 12.884303133834594
episode: 3903, reward: 12.884303133834594
episode: 3904, reward: 12.884303133834594
episode: 3905, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3906, reward: 12.884303133834594
episode: 3907, reward: 12.884303133834594
episode: 3908, reward: 12.884303133834594
episode: 3909, reward: 12.884303133834594
episode: 3910, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 3911, reward: 12.884303133834594
episode: 3912, reward: 12.884303133834594
episode: 3913, reward: 12.884303133834594
episode: 3914, reward: 12.884303133834594
episode: 3915, reward: 12.884303133834594
Evaluation over 50 episodes: 15.

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4031, reward: 12.884303133834594
episode: 4032, reward: 12.884303133834594
episode: 4033, reward: 12.884303133834594
episode: 4034, reward: 12.932462195899031
episode: 4035, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4036, reward: 12.884303133834594
episode: 4037, reward: 12.884303133834594
episode: 4038, reward: 12.884303133834594
episode: 4039, reward: 13.077785522870753
episode: 4040, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4041, reward: 12.884303133834594
episode: 4042, reward: 12.884303133834594
episode: 4043, reward: 12.884303133834594
episode: 4044, reward: 12.884303133834594
episode: 4045, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4161, reward: 12.884303133834594
episode: 4162, reward: 12.884303133834594
episode: 4163, reward: 12.884303133834594
episode: 4164, reward: 12.884303133834594
episode: 4165, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4166, reward: 12.884303133834594
episode: 4167, reward: 12.884303133834594
episode: 4168, reward: 12.884303133834594
episode: 4169, reward: 12.884303133834594
episode: 4170, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4171, reward: 12.884303133834594
episode: 4172, reward: 12.455287075609922
episode: 4173, reward: 12.884303133834594
episode: 4174, reward: 12.884303133834594
episode: 4175, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4291, reward: 12.884303133834594
episode: 4292, reward: 12.884303133834594
episode: 4293, reward: 12.884303133834594
episode: 4294, reward: 12.884303133834594
episode: 4295, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4296, reward: 12.884303133834594
episode: 4297, reward: 12.884303133834594
episode: 4298, reward: 12.884303133834594
episode: 4299, reward: 12.251749416677653
episode: 4300, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4301, reward: 12.884303133834594
episode: 4302, reward: 12.884303133834594
episode: 4303, reward: 13.276650100813677
episode: 4304, reward: 12.884303133834594
episode: 4305, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4421, reward: 12.884303133834594
episode: 4422, reward: 12.884303133834594
episode: 4423, reward: 12.884303133834594
episode: 4424, reward: 12.884303133834594
episode: 4425, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4426, reward: 12.884303133834594
episode: 4427, reward: 13.101807247796291
episode: 4428, reward: 12.884303133834594
episode: 4429, reward: 12.884303133834594
episode: 4430, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4431, reward: 12.884303133834594
episode: 4432, reward: 12.884303133834594
episode: 4433, reward: 12.884303133834594
episode: 4434, reward: 12.884303133834594
episode: 4435, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4551, reward: 12.884303133834594
episode: 4552, reward: 12.884303133834594
episode: 4553, reward: 12.884303133834594
episode: 4554, reward: 12.884303133834594
episode: 4555, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4556, reward: 12.884303133834594
episode: 4557, reward: 12.884303133834594
episode: 4558, reward: 12.884303133834594
episode: 4559, reward: 12.884303133834594
episode: 4560, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4561, reward: 12.884303133834594
episode: 4562, reward: 12.884303133834594
episode: 4563, reward: 12.884303133834594
episode: 4564, reward: 12.884303133834594
episode: 4565, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4681, reward: 12.2316104397961
episode: 4682, reward: 12.884303133834594
episode: 4683, reward: 12.884303133834594
episode: 4684, reward: 12.884303133834594
episode: 4685, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4686, reward: 12.884303133834594
episode: 4687, reward: 12.884303133834594
episode: 4688, reward: 12.884303133834594
episode: 4689, reward: 12.884303133834594
episode: 4690, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4691, reward: 12.884303133834594
episode: 4692, reward: 12.884303133834594
episode: 4693, reward: 12.884303133834594
episode: 4694, reward: 12.884303133834594
episode: 4695, reward: 12.884303133834594
Evaluation over 50 episodes: 15.9

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4811, reward: 12.884303133834594
episode: 4812, reward: 12.884303133834594
episode: 4813, reward: 12.884303133834594
episode: 4814, reward: 12.884303133834594
episode: 4815, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4816, reward: 12.884303133834594
episode: 4817, reward: 12.884303133834594
episode: 4818, reward: 12.884303133834594
episode: 4819, reward: 12.884303133834594
episode: 4820, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4821, reward: 12.884303133834594
episode: 4822, reward: 12.884303133834594
episode: 4823, reward: 12.884303133834594
episode: 4824, reward: 12.884303133834594
episode: 4825, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4941, reward: 12.884303133834594
episode: 4942, reward: 12.884303133834594
episode: 4943, reward: 12.884303133834594
episode: 4944, reward: 12.384159017718604
episode: 4945, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4946, reward: 12.884303133834594
episode: 4947, reward: 12.529305351682977
episode: 4948, reward: 12.884303133834594
episode: 4949, reward: 12.884303133834594
episode: 4950, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 4951, reward: 11.715535091206917
episode: 4952, reward: 12.884303133834594
episode: 4953, reward: 12.884303133834594
episode: 4954, reward: 12.884303133834594
episode: 4955, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5071, reward: 12.884303133834594
episode: 5072, reward: 12.884303133834594
episode: 5073, reward: 12.884303133834594
episode: 5074, reward: 12.884303133834594
episode: 5075, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5076, reward: 12.884303133834594
episode: 5077, reward: 13.194918235209478
episode: 5078, reward: 12.884303133834594
episode: 5079, reward: 12.884303133834594
episode: 5080, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5081, reward: 12.884303133834594
episode: 5082, reward: 12.884303133834594
episode: 5083, reward: 12.884303133834594
episode: 5084, reward: 12.884303133834594
episode: 5085, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5201, reward: 12.884303133834594
episode: 5202, reward: 12.884303133834594
episode: 5203, reward: 12.884303133834594
episode: 5204, reward: 13.598674963625864
episode: 5205, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5206, reward: 12.970032946115198
episode: 5207, reward: 12.884303133834594
episode: 5208, reward: 13.395005384421022
episode: 5209, reward: 12.884303133834594
episode: 5210, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5211, reward: 12.884303133834594
episode: 5212, reward: 12.884303133834594
episode: 5213, reward: 12.775127685353606
episode: 5214, reward: 12.884303133834594
episode: 5215, reward: 13.037091879202814
Evaluation over 50 episodes: 15

episode: 5330, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5331, reward: 12.884303133834594
episode: 5332, reward: 12.884303133834594
episode: 5333, reward: 12.884303133834594
episode: 5334, reward: 12.884303133834594
episode: 5335, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5336, reward: 12.884303133834594
episode: 5337, reward: 12.884303133834594
episode: 5338, reward: 12.884303133834594
episode: 5339, reward: 12.884303133834594
episode: 5340, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5341, reward: 12.68564185059292
episode: 5342, reward: 13.284353954870534
episode: 5343, reward: 12.884303133834594
episode: 5344, reward: 12.884303133834594
episode: 5345, reward: 12.884303

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5461, reward: 12.884303133834594
episode: 5462, reward: 12.884303133834594
episode: 5463, reward: 12.884303133834594
episode: 5464, reward: 12.884303133834594
episode: 5465, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5466, reward: 12.884303133834594
episode: 5467, reward: 12.604720153935707
episode: 5468, reward: 12.884303133834594
episode: 5469, reward: 12.884303133834594
episode: 5470, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5471, reward: 12.884303133834594
episode: 5472, reward: 12.884303133834594
episode: 5473, reward: 12.884303133834594
episode: 5474, reward: 13.459344430564302
episode: 5475, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5591, reward: 12.884303133834594
episode: 5592, reward: 12.884303133834594
episode: 5593, reward: 13.484702074667752
episode: 5594, reward: 13.276650100813677
episode: 5595, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5596, reward: 12.884303133834594
episode: 5597, reward: 12.884303133834594
episode: 5598, reward: 12.884303133834594
episode: 5599, reward: 12.884303133834594
episode: 5600, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5601, reward: 12.885704833899414
episode: 5602, reward: 13.115206639400526
episode: 5603, reward: 12.884303133834594
episode: 5604, reward: 12.884303133834594
episode: 5605, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5721, reward: 12.884303133834594
episode: 5722, reward: 12.884303133834594
episode: 5723, reward: 12.884303133834594
episode: 5724, reward: 12.884303133834594
episode: 5725, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5726, reward: 12.884303133834594
episode: 5727, reward: 12.884303133834594
episode: 5728, reward: 12.884303133834594
episode: 5729, reward: 12.884303133834594
episode: 5730, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5731, reward: 12.884303133834594
episode: 5732, reward: 12.884303133834594
episode: 5733, reward: 12.884303133834594
episode: 5734, reward: 12.884303133834594
episode: 5735, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5851, reward: 12.884303133834594
episode: 5852, reward: 12.884303133834594
episode: 5853, reward: 12.884303133834594
episode: 5854, reward: 12.884303133834594
episode: 5855, reward: 12.326202149835954
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5856, reward: 12.422703319523713
episode: 5857, reward: 12.884303133834594
episode: 5858, reward: 12.174307569531365
episode: 5859, reward: 12.884303133834594
episode: 5860, reward: 12.966708625277198
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5861, reward: 12.884303133834594
episode: 5862, reward: 12.884303133834594
episode: 5863, reward: 12.884303133834594
episode: 5864, reward: 12.884303133834594
episode: 5865, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5981, reward: 12.884303133834594
episode: 5982, reward: 13.025434260890536
episode: 5983, reward: 12.884303133834594
episode: 5984, reward: 12.884303133834594
episode: 5985, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5986, reward: 12.884303133834594
episode: 5987, reward: 12.884303133834594
episode: 5988, reward: 12.884303133834594
episode: 5989, reward: 12.884303133834594
episode: 5990, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 5991, reward: 12.884303133834594
episode: 5992, reward: 12.884303133834594
episode: 5993, reward: 12.884303133834594
episode: 5994, reward: 12.884303133834594
episode: 5995, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6111, reward: 12.884303133834594
episode: 6112, reward: 12.884303133834594
episode: 6113, reward: 12.884303133834594
episode: 6114, reward: 12.884303133834594
episode: 6115, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6116, reward: 12.884303133834594
episode: 6117, reward: 12.884303133834594
episode: 6118, reward: 12.884303133834594
episode: 6119, reward: 12.355942208516488
episode: 6120, reward: 12.960697506518704
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6121, reward: 12.884303133834594
episode: 6122, reward: 12.884303133834594
episode: 6123, reward: 12.884303133834594
episode: 6124, reward: 12.884303133834594
episode: 6125, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6241, reward: 12.884303133834594
episode: 6242, reward: 12.884303133834594
episode: 6243, reward: 12.884303133834594
episode: 6244, reward: 12.884303133834594
episode: 6245, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6246, reward: 12.884303133834594
episode: 6247, reward: 12.884303133834594
episode: 6248, reward: 12.884303133834594
episode: 6249, reward: 12.884303133834594
episode: 6250, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6251, reward: 12.884303133834594
episode: 6252, reward: 12.884303133834594
episode: 6253, reward: 12.884303133834594
episode: 6254, reward: 13.577826417581463
episode: 6255, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6371, reward: 11.619195699520715
episode: 6372, reward: 12.884303133834594
episode: 6373, reward: 12.884303133834594
episode: 6374, reward: 12.884303133834594
episode: 6375, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6376, reward: 12.884303133834594
episode: 6377, reward: 11.873380001937285
episode: 6378, reward: 12.884303133834594
episode: 6379, reward: 12.884303133834594
episode: 6380, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6381, reward: 13.207390283637366
episode: 6382, reward: 12.884303133834594
episode: 6383, reward: 12.884303133834594
episode: 6384, reward: 12.884303133834594
episode: 6385, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6501, reward: 12.884303133834594
episode: 6502, reward: 12.884303133834594
episode: 6503, reward: 12.884303133834594
episode: 6504, reward: 12.884303133834594
episode: 6505, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6506, reward: 12.884303133834594
episode: 6507, reward: 12.884303133834594
episode: 6508, reward: 12.884303133834594
episode: 6509, reward: 12.884303133834594
episode: 6510, reward: 13.358616436081116
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6511, reward: 12.884303133834594
episode: 6512, reward: 12.884303133834594
episode: 6513, reward: 12.884303133834594
episode: 6514, reward: 12.884303133834594
episode: 6515, reward: 12.884303133834594
Evaluation over 50 episodes: 15

episode: 6630, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6631, reward: 12.884303133834594
episode: 6632, reward: 12.884303133834594
episode: 6633, reward: 12.884303133834594
episode: 6634, reward: 12.884303133834594
episode: 6635, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6636, reward: 12.884303133834594
episode: 6637, reward: 12.884303133834594
episode: 6638, reward: 12.884303133834594
episode: 6639, reward: 12.605252641835273
episode: 6640, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6641, reward: 12.884303133834594
episode: 6642, reward: 12.884303133834594
episode: 6643, reward: 12.884303133834594
episode: 6644, reward: 12.884303133834594
episode: 6645, reward: 12.88430

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6761, reward: 12.884303133834594
episode: 6762, reward: 12.884303133834594
episode: 6763, reward: 12.884303133834594
episode: 6764, reward: 12.884303133834594
episode: 6765, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6766, reward: 12.884303133834594
episode: 6767, reward: 12.884303133834594
episode: 6768, reward: 12.884303133834594
episode: 6769, reward: 12.884303133834594
episode: 6770, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6771, reward: 12.884303133834594
episode: 6772, reward: 12.884303133834594
episode: 6773, reward: 12.884303133834594
episode: 6774, reward: 12.026271017385255
episode: 6775, reward: 12.422703319523713
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6891, reward: 12.884303133834594
episode: 6892, reward: 11.886818301732252
episode: 6893, reward: 12.884303133834594
episode: 6894, reward: 12.884303133834594
episode: 6895, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6896, reward: 12.884303133834594
episode: 6897, reward: 12.884303133834594
episode: 6898, reward: 12.884303133834594
episode: 6899, reward: 12.884303133834594
episode: 6900, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 6901, reward: 13.30560401901003
episode: 6902, reward: 12.884303133834594
episode: 6903, reward: 12.884303133834594
episode: 6904, reward: 12.884303133834594
episode: 6905, reward: 12.884303133834594
Evaluation over 50 episodes: 15.

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7021, reward: 12.884303133834594
episode: 7022, reward: 12.884303133834594
episode: 7023, reward: 12.884303133834594
episode: 7024, reward: 12.884303133834594
episode: 7025, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7026, reward: 12.884303133834594
episode: 7027, reward: 12.884303133834594
episode: 7028, reward: 12.884303133834594
episode: 7029, reward: 12.884303133834594
episode: 7030, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7031, reward: 12.884303133834594
episode: 7032, reward: 12.884303133834594
episode: 7033, reward: 13.366708226914824
episode: 7034, reward: 12.884303133834594
episode: 7035, reward: 11.619195699520715
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7151, reward: 11.376098732142532
episode: 7152, reward: 12.884303133834594
episode: 7153, reward: 12.884303133834594
episode: 7154, reward: 12.884303133834594
episode: 7155, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7156, reward: 12.884303133834594
episode: 7157, reward: 12.884303133834594
episode: 7158, reward: 12.884303133834594
episode: 7159, reward: 12.884303133834594
episode: 7160, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7161, reward: 12.884303133834594
episode: 7162, reward: 12.884303133834594
episode: 7163, reward: 12.884303133834594
episode: 7164, reward: 13.284353954870534
episode: 7165, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7281, reward: 12.884303133834594
episode: 7282, reward: 12.884303133834594
episode: 7283, reward: 12.884303133834594
episode: 7284, reward: 12.884303133834594
episode: 7285, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7286, reward: 12.884303133834594
episode: 7287, reward: 12.884303133834594
episode: 7288, reward: 12.884303133834594
episode: 7289, reward: 13.037091879202814
episode: 7290, reward: 12.627482202783314
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7291, reward: 12.884303133834594
episode: 7292, reward: 12.884303133834594
episode: 7293, reward: 12.884303133834594
episode: 7294, reward: 13.358616436081116
episode: 7295, reward: 12.949039888206427
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7411, reward: 12.884303133834594
episode: 7412, reward: 12.76618824578523
episode: 7413, reward: 12.884303133834594
episode: 7414, reward: 13.207390283637366
episode: 7415, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7416, reward: 12.884303133834594
episode: 7417, reward: 12.884303133834594
episode: 7418, reward: 12.884303133834594
episode: 7419, reward: 12.884303133834594
episode: 7420, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7421, reward: 12.884303133834594
episode: 7422, reward: 12.79985769406504
episode: 7423, reward: 12.884303133834594
episode: 7424, reward: 12.884303133834594
episode: 7425, reward: 12.980621257963469
Evaluation over 50 episodes: 15.9

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7541, reward: 12.884303133834594
episode: 7542, reward: 12.884303133834594
episode: 7543, reward: 12.884303133834594
episode: 7544, reward: 12.884303133834594
episode: 7545, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7546, reward: 12.884303133834594
episode: 7547, reward: 12.884303133834594
episode: 7548, reward: 12.931193382038
episode: 7549, reward: 12.884303133834594
episode: 7550, reward: 13.025434260890536
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7551, reward: 12.884303133834594
episode: 7552, reward: 13.230574268238973
episode: 7553, reward: 12.884303133834594
episode: 7554, reward: 12.884303133834594
episode: 7555, reward: 12.884303133834594
Evaluation over 50 episodes: 15.97

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7671, reward: 12.884303133834594
episode: 7672, reward: 12.884303133834594
episode: 7673, reward: 12.884303133834594
episode: 7674, reward: 12.884303133834594
episode: 7675, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7676, reward: 12.960697506518704
episode: 7677, reward: 12.884303133834594
episode: 7678, reward: 12.18995701467731
episode: 7679, reward: 12.884303133834594
episode: 7680, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7681, reward: 12.884303133834594
episode: 7682, reward: 12.884303133834594
episode: 7683, reward: 12.884303133834594
episode: 7684, reward: 12.884303133834594
episode: 7685, reward: 12.884303133834594
Evaluation over 50 episodes: 15.

episode: 7800, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7801, reward: 12.884303133834594
episode: 7802, reward: 12.884303133834594
episode: 7803, reward: 13.229394853900425
episode: 7804, reward: 12.183547143792383
episode: 7805, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7806, reward: 12.884303133834594
episode: 7807, reward: 12.884303133834594
episode: 7808, reward: 12.884303133834594
episode: 7809, reward: 13.305308694174293
episode: 7810, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7811, reward: 12.884303133834594
episode: 7812, reward: 12.884303133834594
episode: 7813, reward: 12.884303133834594
episode: 7814, reward: 12.884303133834594
episode: 7815, reward: 12.88430

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7931, reward: 12.884303133834594
episode: 7932, reward: 12.884303133834594
episode: 7933, reward: 12.60954687035512
episode: 7934, reward: 12.884303133834594
episode: 7935, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7936, reward: 12.884303133834594
episode: 7937, reward: 12.884303133834594
episode: 7938, reward: 12.884303133834594
episode: 7939, reward: 12.884303133834594
episode: 7940, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 7941, reward: 12.884303133834594
episode: 7942, reward: 12.884303133834594
episode: 7943, reward: 12.884303133834594
episode: 7944, reward: 12.884303133834594
episode: 7945, reward: 12.884303133834594
Evaluation over 50 episodes: 15.

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8061, reward: 12.884303133834594
episode: 8062, reward: 12.884303133834594
episode: 8063, reward: 13.553672549005645
episode: 8064, reward: 12.884303133834594
episode: 8065, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8066, reward: 12.884303133834594
episode: 8067, reward: 12.825245689809913
episode: 8068, reward: 12.884303133834594
episode: 8069, reward: 12.825245689809913
episode: 8070, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8071, reward: 12.884303133834594
episode: 8072, reward: 12.884303133834594
episode: 8073, reward: 12.884303133834594
episode: 8074, reward: 12.884303133834594
episode: 8075, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8191, reward: 12.872645515522317
episode: 8192, reward: 12.884303133834594
episode: 8193, reward: 12.884303133834594
episode: 8194, reward: 12.884303133834594
episode: 8195, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8196, reward: 13.255764624552297
episode: 8197, reward: 12.884303133834594
episode: 8198, reward: 12.884303133834594
episode: 8199, reward: 12.884303133834594
episode: 8200, reward: 13.484702074667752
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8201, reward: 12.884303133834594
episode: 8202, reward: 12.884303133834594
episode: 8203, reward: 12.884303133834594
episode: 8204, reward: 12.884303133834594
episode: 8205, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8321, reward: 12.884303133834594
episode: 8322, reward: 12.884303133834594
episode: 8323, reward: 12.992503498383027
episode: 8324, reward: 12.884303133834594
episode: 8325, reward: 13.035164465298935
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8326, reward: 12.884303133834594
episode: 8327, reward: 13.142587903254059
episode: 8328, reward: 12.884303133834594
episode: 8329, reward: 12.884303133834594
episode: 8330, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8331, reward: 12.884303133834594
episode: 8332, reward: 12.884303133834594
episode: 8333, reward: 12.884303133834594
episode: 8334, reward: 12.884303133834594
episode: 8335, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8451, reward: 12.884303133834594
episode: 8452, reward: 12.884303133834594
episode: 8453, reward: 11.376098732142532
episode: 8454, reward: 12.884303133834594
episode: 8455, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8456, reward: 12.884303133834594
episode: 8457, reward: 12.884303133834594
episode: 8458, reward: 12.884303133834594
episode: 8459, reward: 12.884303133834594
episode: 8460, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8461, reward: 12.884303133834594
episode: 8462, reward: 12.884303133834594
episode: 8463, reward: 12.884303133834594
episode: 8464, reward: 12.884303133834594
episode: 8465, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8581, reward: 12.884303133834594
episode: 8582, reward: 12.884303133834594
episode: 8583, reward: 12.884303133834594
episode: 8584, reward: 12.884303133834594
episode: 8585, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8586, reward: 12.884303133834594
episode: 8587, reward: 12.884303133834594
episode: 8588, reward: 12.884303133834594
episode: 8589, reward: 12.884303133834594
episode: 8590, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8591, reward: 12.884303133834594
episode: 8592, reward: 12.884303133834594
episode: 8593, reward: 12.884303133834594
episode: 8594, reward: 12.884303133834594
episode: 8595, reward: 12.884303133834594
Evaluation over 50 episodes: 15

episode: 8710, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8711, reward: 12.884303133834594
episode: 8712, reward: 12.884303133834594
episode: 8713, reward: 13.383052482146386
episode: 8714, reward: 12.884303133834594
episode: 8715, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8716, reward: 12.884303133834594
episode: 8717, reward: 12.884303133834594
episode: 8718, reward: 12.884303133834594
episode: 8719, reward: 12.884303133834594
episode: 8720, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8721, reward: 13.025434260890536
episode: 8722, reward: 12.884303133834594
episode: 8723, reward: 12.884303133834594
episode: 8724, reward: 12.884303133834594
episode: 8725, reward: 12.88430

episode: 8840, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8841, reward: 12.884303133834594
episode: 8842, reward: 12.884303133834594
episode: 8843, reward: 12.884303133834594
episode: 8844, reward: 12.884303133834594
episode: 8845, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8846, reward: 12.884303133834594
episode: 8847, reward: 12.884303133834594
episode: 8848, reward: 12.884303133834594
episode: 8849, reward: 12.884303133834594
episode: 8850, reward: 12.931193382038
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8851, reward: 12.884303133834594
episode: 8852, reward: 12.884303133834594
episode: 8853, reward: 12.884303133834594
episode: 8854, reward: 12.884303133834594
episode: 8855, reward: 12.88430313

episode: 8970, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8971, reward: 13.459344430564302
episode: 8972, reward: 12.884303133834594
episode: 8973, reward: 12.884303133834594
episode: 8974, reward: 12.884303133834594
episode: 8975, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8976, reward: 12.884303133834594
episode: 8977, reward: 12.884303133834594
episode: 8978, reward: 12.884303133834594
episode: 8979, reward: 12.884303133834594
episode: 8980, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 8981, reward: 12.884303133834594
episode: 8982, reward: 12.884303133834594
episode: 8983, reward: 12.884303133834594
episode: 8984, reward: 12.884303133834594
episode: 8985, reward: 12.88430

episode: 9096, reward: 12.884303133834594
episode: 9097, reward: 12.884303133834594
episode: 9098, reward: 12.884303133834594
episode: 9099, reward: 12.884303133834594
episode: 9100, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9101, reward: 12.884303133834594
episode: 9102, reward: 12.884303133834594
episode: 9103, reward: 12.884303133834594
episode: 9104, reward: 12.884303133834594
episode: 9105, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9106, reward: 12.884303133834594
episode: 9107, reward: 12.884303133834594
episode: 9108, reward: 12.884303133834594
episode: 9109, reward: 12.884303133834594
episode: 9110, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9111, reward: 12.88430

episode: 9225, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9226, reward: 12.884303133834594
episode: 9227, reward: 12.872645515522317
episode: 9228, reward: 12.884303133834594
episode: 9229, reward: 12.884303133834594
episode: 9230, reward: 13.037091879202814
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9231, reward: 12.884303133834594
episode: 9232, reward: 12.884303133834594
episode: 9233, reward: 12.884303133834594
episode: 9234, reward: 12.884303133834594
episode: 9235, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9236, reward: 12.884303133834594
episode: 9237, reward: 12.884303133834594
episode: 9238, reward: 12.884303133834594
episode: 9239, reward: 12.884303133834594
episode: 9240, reward: 12.88430

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9356, reward: 12.884303133834594
episode: 9357, reward: 12.884303133834594
episode: 9358, reward: 12.884303133834594
episode: 9359, reward: 12.884303133834594
episode: 9360, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9361, reward: 12.884303133834594
episode: 9362, reward: 12.884303133834594
episode: 9363, reward: 12.884303133834594
episode: 9364, reward: 12.884303133834594
episode: 9365, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9366, reward: 12.884303133834594
episode: 9367, reward: 12.884303133834594
episode: 9368, reward: 12.884303133834594
episode: 9369, reward: 12.884303133834594
episode: 9370, reward: 12.884303133834594
Evaluation over 50 episodes: 15

episode: 9485, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9486, reward: 12.884303133834594
episode: 9487, reward: 12.884303133834594
episode: 9488, reward: 12.884303133834594
episode: 9489, reward: 12.884303133834594
episode: 9490, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9491, reward: 12.884303133834594
episode: 9492, reward: 12.884303133834594
episode: 9493, reward: 12.884303133834594
episode: 9494, reward: 12.884303133834594
episode: 9495, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9496, reward: 12.884303133834594
episode: 9497, reward: 12.884303133834594
episode: 9498, reward: 12.884303133834594
episode: 9499, reward: 12.884303133834594
episode: 9500, reward: 12.88430

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9616, reward: 12.884303133834594
episode: 9617, reward: 12.884303133834594
episode: 9618, reward: 12.884303133834594
episode: 9619, reward: 12.884303133834594
episode: 9620, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9621, reward: 12.884303133834594
episode: 9622, reward: 12.884303133834594
episode: 9623, reward: 12.884303133834594
episode: 9624, reward: 12.884303133834594
episode: 9625, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9626, reward: 12.884303133834594
episode: 9627, reward: 12.884303133834594
episode: 9628, reward: 12.884303133834594
episode: 9629, reward: 12.884303133834594
episode: 9630, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9746, reward: 12.884303133834594
episode: 9747, reward: 12.884303133834594
episode: 9748, reward: 12.884303133834594
episode: 9749, reward: 12.884303133834594
episode: 9750, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9751, reward: 12.884303133834594
episode: 9752, reward: 12.884303133834594
episode: 9753, reward: 12.884303133834594
episode: 9754, reward: 12.884303133834594
episode: 9755, reward: 13.247696182617323
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9756, reward: 11.619195699520715
episode: 9757, reward: 12.884303133834594
episode: 9758, reward: 12.884303133834594
episode: 9759, reward: 12.884303133834594
episode: 9760, reward: 12.884303133834594
Evaluation over 50 episodes: 15

Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9876, reward: 12.884303133834594
episode: 9877, reward: 12.884303133834594
episode: 9878, reward: 12.970032946115198
episode: 9879, reward: 12.884303133834594
episode: 9880, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9881, reward: 12.884303133834594
episode: 9882, reward: 12.884303133834594
episode: 9883, reward: 12.884303133834594
episode: 9884, reward: 12.884303133834594
episode: 9885, reward: 12.884303133834594
Evaluation over 50 episodes: 15.977
---------------------------------------
tensor(15.9769, dtype=torch.float64)
episode: 9886, reward: 12.884303133834594
episode: 9887, reward: 12.884303133834594
episode: 9888, reward: 12.884303133834594
episode: 9889, reward: 12.884303133834594
episode: 9890, reward: 12.884303133834594
Evaluation over 50 episodes: 15

In [7]:
a*b

array([ 2,  6, 12])

In [24]:
u=2,v=3

SyntaxError: cannot assign to literal (<ipython-input-24-dccffd348434>, line 1)

In [26]:
u=2;v=3

In [27]:
v

3

In [58]:
x = torch.tensor(0.4435, dtype=torch.float64)

In [59]:
x


tensor(0.4435, dtype=torch.float64)

In [60]:
x>0.2

tensor(True)

In [61]:
x<0.2

tensor(False)

In [63]:
if (x>0.2):
    print("wrong")

wrong
