In [1]:
import os
import argparse
import math
import gym
import numpy as np
import itertools
import torch
from PIL import Image
from SAC.sac import SAC
from tensorboardX import SummaryWriter
from SAC.normalized_actions import NormalizedActions
from SAC.replay_memory import ReplayMemory

'''
parser = argparse.ArgumentParser(description='PyTorch REINFORCE example')
parser.add_argument('--env-name', default="BipedalWalker-v2",
                    help='name of the environment to run')
parser.add_argument('--policy', default="Gaussian",
                    help='algorithm to use: Gaussian | Deterministic')
parser.add_argument('--eval', type=bool, default=False,
                    help='Evaluate a policy (default:False)')
parser.add_argument('--gamma', type=float, default=0.99, metavar='G',
                    help='discount factor for reward (default: 0.99)')
parser.add_argument('--tau', type=float, default=0.005, metavar='G',
                    help='target smoothing coefficient(τ) (default: 0.005)')
parser.add_argument('--lr', type=float, default=0.0003, metavar='G',
                    help='learning rate (default: 0.0003)')
parser.add_argument('--alpha', type=float, default=0.2, metavar='G',
                    help='Temperature parameter α determines the relative importance of the entropy term against the reward (default: 0.2)')
parser.add_argument('--seed', type=int, default=543, metavar='N',
                    help='random seed (default: 543)')
parser.add_argument('--batch_size', type=int, default=256, metavar='N',
                    help='batch size (default: 256)')
parser.add_argument('--num_steps', type=int, default=1000000, metavar='N',
                    help='maximum number of steps (default: 1000000)')
parser.add_argument('--hidden_size', type=int, default=256, metavar='N',
                    help='hidden size (default: 256)')
parser.add_argument('--updates_per_step', type=int, default=1, metavar='N',
                    help='model updates per simulator step (default: 1)')
parser.add_argument('--target_update_interval', type=int, default=1, metavar='N',
                    help='Value target update per no. of updates per step (default: 1)')
parser.add_argument('--replay_size', type=int, default=1000000, metavar='N',
                    help='size of replay buffer (default: 10000000)')
args = parser.parse_args()
'''

args = {
    "env_name": "BipedalWalker-v2",
    "policy": "Gaussian",
    "eval": False,
    "gamma": 0.99,
    "tau": 0.001,
    "lr": 0.0001,
    "alpha": 0.2,
    "seed": 333,
    "batch_size": 1024,
    "num_steps": 5000000,
    "steps_in_episode": 2000,
    "hidden_size": 256,
    "updates_per_step": 1,
    "target_update_interval": 1,
    "replay_size": 20000
}    


# Environment
env = NormalizedActions(gym.make(args['env_name']))
env.seed(args['seed'])
torch.manual_seed(args['seed'])
np.random.seed(args['seed'])

# Agent
agent = SAC(env.observation_space.shape[0], env.action_space, args)

writer = SummaryWriter()

# Memory
memory = ReplayMemory(args['replay_size'])

# Training Loop
rewards = []
total_numsteps = 0
updates = 0

for i_episode in itertools.count():
    state = env.reset()

    episode_reward = 0
    for t in range(args['steps_in_episode']):
        action = agent.select_action(state)  # Sample action from policy
        next_state, reward, done, _ = env.step(action)  # Step
        mask = not done  # 1 for not done and 0 for done
        memory.push(state, action, reward, next_state, mask)  # Append transition to memory
        if len(memory) > args['batch_size']:
            for i in range(args['updates_per_step']): # Number of updates per step in environment
                # Sample a batch from memory
                state_batch, action_batch, reward_batch, next_state_batch, mask_batch = memory.sample(args['batch_size'])
                # Update parameters of all the networks
                value_loss, critic_1_loss, critic_2_loss, policy_loss = agent.update_parameters(state_batch, action_batch, 
                                                                                                reward_batch, next_state_batch, 
                                                                                                mask_batch, updates)

                writer.add_scalar('loss/value', value_loss, updates)
                writer.add_scalar('loss/critic_1', critic_1_loss, updates)
                writer.add_scalar('loss/critic_2', critic_2_loss, updates)
                writer.add_scalar('loss/policy', policy_loss, updates)
                updates += 1

        state = next_state
        total_numsteps += 1
        episode_reward += reward

        if done:
            break

    if total_numsteps > args['num_steps']:
        break

    writer.add_scalar('reward/train', episode_reward, i_episode)
    rewards.append(episode_reward)
    print("Episode: {}, total numsteps: {}, reward: {}, average reward: {}".format(i_episode, total_numsteps, np.round(rewards[-1],2),
                                                                                np.round(np.mean(rewards[-100:]),2)))




[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: <class 'SAC.normalized_actions.NormalizedActions'> doesn't implement 'action' method. Maybe it implements deprecated '_action' method.[0m
Episode: 0, total numsteps: 74, reward: -105.35, average reward: -105.35
Episode: 1, total numsteps: 147, reward: -103.44, average reward: -104.4
Episode: 2, total numsteps: 1747, reward: -109.67, average reward: -106.15
Episode: 3, total numsteps: 2408, reward: -173.73, average reward: -123.05
Episode: 4, total numsteps: 2449, reward: -109.1, average reward: -120.26
Episode: 5, total numsteps: 4049, reward: -82.46, average reward: -113.96
Episode: 6, total numsteps: 4118, reward: -112.13, average reward: -113.7
Episode: 7, total numsteps: 4187, reward: -107.05, average reward: -112.87
Episode: 8, total numsteps: 4248, reward:

Episode: 104, total numsteps: 75067, reward: -78.76, average reward: -100.15
Episode: 105, total numsteps: 76667, reward: -74.73, average reward: -100.07
Episode: 106, total numsteps: 78267, reward: -74.85, average reward: -99.7
Episode: 107, total numsteps: 79867, reward: -88.35, average reward: -99.51
Episode: 108, total numsteps: 81467, reward: -72.04, average reward: -99.09
Episode: 109, total numsteps: 83067, reward: -75.55, average reward: -98.7
Episode: 110, total numsteps: 84667, reward: -77.9, average reward: -98.33
Episode: 111, total numsteps: 86267, reward: -74.13, average reward: -97.82
Episode: 112, total numsteps: 87867, reward: -78.64, average reward: -97.37
Episode: 113, total numsteps: 89467, reward: -81.57, average reward: -97.04
Episode: 114, total numsteps: 91067, reward: -75.53, average reward: -96.59
Episode: 115, total numsteps: 92667, reward: -76.13, average reward: -96.24
Episode: 116, total numsteps: 94267, reward: -72.74, average reward: -95.81
Episode: 117,

Episode: 211, total numsteps: 246267, reward: -65.06, average reward: -74.21
Episode: 212, total numsteps: 247867, reward: -70.44, average reward: -74.13
Episode: 213, total numsteps: 249467, reward: -60.37, average reward: -73.92
Episode: 214, total numsteps: 251067, reward: -64.81, average reward: -73.81
Episode: 215, total numsteps: 252667, reward: -64.27, average reward: -73.69
Episode: 216, total numsteps: 254267, reward: -68.22, average reward: -73.65
Episode: 217, total numsteps: 255867, reward: -64.93, average reward: -73.52
Episode: 218, total numsteps: 257467, reward: -60.89, average reward: -73.39
Episode: 219, total numsteps: 259067, reward: -60.68, average reward: -73.23
Episode: 220, total numsteps: 260667, reward: -64.17, average reward: -73.07
Episode: 221, total numsteps: 262267, reward: -67.99, average reward: -72.96
Episode: 222, total numsteps: 263867, reward: -68.6, average reward: -72.85
Episode: 223, total numsteps: 265467, reward: -68.05, average reward: -72.82


Episode: 318, total numsteps: 417467, reward: -67.28, average reward: -67.61
Episode: 319, total numsteps: 419067, reward: -72.28, average reward: -67.72
Episode: 320, total numsteps: 420667, reward: -70.11, average reward: -67.78
Episode: 321, total numsteps: 422267, reward: -68.14, average reward: -67.78
Episode: 322, total numsteps: 423867, reward: -66.79, average reward: -67.77
Episode: 323, total numsteps: 425467, reward: -72.72, average reward: -67.81
Episode: 324, total numsteps: 427067, reward: -72.82, average reward: -67.89
Episode: 325, total numsteps: 428667, reward: -70.4, average reward: -67.91
Episode: 326, total numsteps: 430267, reward: -70.0, average reward: -67.99
Episode: 327, total numsteps: 431867, reward: -67.58, average reward: -68.01
Episode: 328, total numsteps: 433467, reward: -70.29, average reward: -68.05
Episode: 329, total numsteps: 435067, reward: -71.89, average reward: -68.1
Episode: 330, total numsteps: 436667, reward: -74.69, average reward: -68.13
Ep

Episode: 425, total numsteps: 515871, reward: -111.69, average reward: -89.53
Episode: 426, total numsteps: 515925, reward: -112.79, average reward: -89.95
Episode: 427, total numsteps: 515985, reward: -121.61, average reward: -90.49
Episode: 428, total numsteps: 516027, reward: -109.66, average reward: -90.89
Episode: 429, total numsteps: 516090, reward: -123.0, average reward: -91.4
Episode: 430, total numsteps: 516152, reward: -114.03, average reward: -91.79
Episode: 431, total numsteps: 516212, reward: -118.35, average reward: -92.25
Episode: 432, total numsteps: 516275, reward: -120.7, average reward: -92.69
Episode: 433, total numsteps: 516326, reward: -113.05, average reward: -93.04
Episode: 434, total numsteps: 516358, reward: -106.31, average reward: -93.37
Episode: 435, total numsteps: 516403, reward: -110.24, average reward: -93.23
Episode: 436, total numsteps: 516457, reward: -112.37, average reward: -93.67
Episode: 437, total numsteps: 516506, reward: -111.32, average rewa

Episode: 530, total numsteps: 533818, reward: -99.84, average reward: -107.88
Episode: 531, total numsteps: 533981, reward: -100.77, average reward: -107.71
Episode: 532, total numsteps: 534029, reward: -101.45, average reward: -107.51
Episode: 533, total numsteps: 534122, reward: -103.43, average reward: -107.42
Episode: 534, total numsteps: 534171, reward: -100.46, average reward: -107.36
Episode: 535, total numsteps: 534237, reward: -98.41, average reward: -107.24
Episode: 536, total numsteps: 534308, reward: -104.81, average reward: -107.16
Episode: 537, total numsteps: 534379, reward: -99.58, average reward: -107.05
Episode: 538, total numsteps: 534494, reward: -99.28, average reward: -106.93
Episode: 539, total numsteps: 534558, reward: -99.71, average reward: -106.84
Episode: 540, total numsteps: 534812, reward: -117.61, average reward: -106.96
Episode: 541, total numsteps: 535167, reward: -146.82, average reward: -107.35
Episode: 542, total numsteps: 535231, reward: -102.58, av

Episode: 636, total numsteps: 609656, reward: -124.47, average reward: -89.96
Episode: 637, total numsteps: 609705, reward: -120.78, average reward: -90.17
Episode: 638, total numsteps: 609770, reward: -121.77, average reward: -90.39
Episode: 639, total numsteps: 611370, reward: -81.08, average reward: -90.21
Episode: 640, total numsteps: 612970, reward: -71.85, average reward: -89.75
Episode: 641, total numsteps: 613040, reward: -109.25, average reward: -89.37
Episode: 642, total numsteps: 613148, reward: -104.73, average reward: -89.4
Episode: 643, total numsteps: 614748, reward: -71.1, average reward: -89.09
Episode: 644, total numsteps: 614788, reward: -112.41, average reward: -89.15
Episode: 645, total numsteps: 614831, reward: -112.25, average reward: -88.94
Episode: 646, total numsteps: 615090, reward: -105.7, average reward: -89.0
Episode: 647, total numsteps: 615131, reward: -112.78, average reward: -89.1
Episode: 648, total numsteps: 615185, reward: -112.07, average reward: -

Episode: 743, total numsteps: 762657, reward: -74.0, average reward: -74.43
Episode: 744, total numsteps: 764257, reward: -65.99, average reward: -73.96
Episode: 745, total numsteps: 765857, reward: -67.65, average reward: -73.52
Episode: 746, total numsteps: 767457, reward: -68.21, average reward: -73.14
Episode: 747, total numsteps: 769057, reward: -72.45, average reward: -72.74
Episode: 748, total numsteps: 770657, reward: -64.49, average reward: -72.26
Episode: 749, total numsteps: 772257, reward: -63.49, average reward: -71.73
Episode: 750, total numsteps: 773857, reward: -67.17, average reward: -71.29
Episode: 751, total numsteps: 775457, reward: -64.02, average reward: -70.73
Episode: 752, total numsteps: 777057, reward: -64.27, average reward: -70.72
Episode: 753, total numsteps: 778657, reward: -71.26, average reward: -70.75
Episode: 754, total numsteps: 780257, reward: -66.14, average reward: -70.7
Episode: 755, total numsteps: 781857, reward: -64.97, average reward: -70.62
E

Episode: 850, total numsteps: 929233, reward: -63.91, average reward: -67.71
Episode: 851, total numsteps: 930833, reward: -69.7, average reward: -67.76
Episode: 852, total numsteps: 932433, reward: -67.22, average reward: -67.79
Episode: 853, total numsteps: 934033, reward: -69.99, average reward: -67.78
Episode: 854, total numsteps: 935633, reward: -66.58, average reward: -67.79
Episode: 855, total numsteps: 937233, reward: -64.83, average reward: -67.78
Episode: 856, total numsteps: 938833, reward: -70.92, average reward: -67.76
Episode: 857, total numsteps: 940433, reward: -68.97, average reward: -67.77
Episode: 858, total numsteps: 942033, reward: -69.63, average reward: -67.79
Episode: 859, total numsteps: 943633, reward: -66.39, average reward: -67.73
Episode: 860, total numsteps: 945233, reward: -65.26, average reward: -67.73
Episode: 861, total numsteps: 946833, reward: -63.91, average reward: -67.61
Episode: 862, total numsteps: 948433, reward: -73.21, average reward: -67.59


Episode: 956, total numsteps: 1097302, reward: -73.48, average reward: -69.39
Episode: 957, total numsteps: 1098902, reward: -70.01, average reward: -69.4
Episode: 958, total numsteps: 1100502, reward: -67.4, average reward: -69.38
Episode: 959, total numsteps: 1102102, reward: -73.44, average reward: -69.45
Episode: 960, total numsteps: 1103702, reward: -63.24, average reward: -69.43
Episode: 961, total numsteps: 1105302, reward: -66.67, average reward: -69.46
Episode: 962, total numsteps: 1106902, reward: -67.57, average reward: -69.4
Episode: 963, total numsteps: 1106966, reward: -109.31, average reward: -69.79
Episode: 964, total numsteps: 1108566, reward: -64.31, average reward: -69.7
Episode: 965, total numsteps: 1110166, reward: -66.51, average reward: -69.66
Episode: 966, total numsteps: 1111766, reward: -61.16, average reward: -69.61
Episode: 967, total numsteps: 1113366, reward: -69.02, average reward: -69.66
Episode: 968, total numsteps: 1114966, reward: -67.91, average rewa

Episode: 1061, total numsteps: 1263766, reward: -70.71, average reward: -72.3
Episode: 1062, total numsteps: 1265366, reward: -70.09, average reward: -72.32
Episode: 1063, total numsteps: 1265545, reward: -129.23, average reward: -72.52
Episode: 1064, total numsteps: 1267145, reward: -78.28, average reward: -72.66
Episode: 1065, total numsteps: 1268745, reward: -77.79, average reward: -72.78
Episode: 1066, total numsteps: 1270345, reward: -70.16, average reward: -72.87
Episode: 1067, total numsteps: 1271945, reward: -68.25, average reward: -72.86
Episode: 1068, total numsteps: 1273545, reward: -68.96, average reward: -72.87
Episode: 1069, total numsteps: 1275145, reward: -70.56, average reward: -72.96
Episode: 1070, total numsteps: 1276745, reward: -71.43, average reward: -73.0
Episode: 1071, total numsteps: 1278345, reward: -68.92, average reward: -73.03
Episode: 1072, total numsteps: 1279945, reward: -66.67, average reward: -73.02
Episode: 1073, total numsteps: 1281545, reward: -67.5

Episode: 1165, total numsteps: 1364302, reward: -99.56, average reward: -83.74
Episode: 1166, total numsteps: 1364376, reward: -102.75, average reward: -84.06
Episode: 1167, total numsteps: 1364431, reward: -102.34, average reward: -84.4
Episode: 1168, total numsteps: 1364495, reward: -102.89, average reward: -84.74
Episode: 1169, total numsteps: 1364574, reward: -99.65, average reward: -85.03
Episode: 1170, total numsteps: 1364643, reward: -100.29, average reward: -85.32
Episode: 1171, total numsteps: 1364708, reward: -100.16, average reward: -85.63
Episode: 1172, total numsteps: 1364773, reward: -99.9, average reward: -85.97
Episode: 1173, total numsteps: 1364842, reward: -99.52, average reward: -86.29
Episode: 1174, total numsteps: 1364905, reward: -100.02, average reward: -86.6
Episode: 1175, total numsteps: 1364958, reward: -103.06, average reward: -86.98
Episode: 1176, total numsteps: 1365017, reward: -101.4, average reward: -87.35
Episode: 1177, total numsteps: 1365077, reward: 

Episode: 1268, total numsteps: 1370266, reward: -101.07, average reward: -101.05
Episode: 1269, total numsteps: 1370325, reward: -100.83, average reward: -101.06
Episode: 1270, total numsteps: 1370371, reward: -102.14, average reward: -101.08
Episode: 1271, total numsteps: 1370433, reward: -102.05, average reward: -101.1
Episode: 1272, total numsteps: 1370484, reward: -101.85, average reward: -101.12
Episode: 1273, total numsteps: 1370549, reward: -99.54, average reward: -101.12
Episode: 1274, total numsteps: 1370609, reward: -99.17, average reward: -101.11
Episode: 1275, total numsteps: 1370666, reward: -98.97, average reward: -101.07
Episode: 1276, total numsteps: 1370725, reward: -99.4, average reward: -101.05
Episode: 1277, total numsteps: 1370793, reward: -99.42, average reward: -101.04
Episode: 1278, total numsteps: 1370851, reward: -99.29, average reward: -101.03
Episode: 1279, total numsteps: 1370915, reward: -98.93, average reward: -101.01
Episode: 1280, total numsteps: 137096

Episode: 1370, total numsteps: 1376457, reward: -101.96, average reward: -101.22
Episode: 1371, total numsteps: 1376525, reward: -98.02, average reward: -101.18
Episode: 1372, total numsteps: 1376597, reward: -98.42, average reward: -101.14
Episode: 1373, total numsteps: 1376655, reward: -103.48, average reward: -101.18
Episode: 1374, total numsteps: 1376729, reward: -98.91, average reward: -101.18
Episode: 1375, total numsteps: 1376804, reward: -98.19, average reward: -101.17
Episode: 1376, total numsteps: 1376870, reward: -99.02, average reward: -101.17
Episode: 1377, total numsteps: 1376942, reward: -96.77, average reward: -101.14
Episode: 1378, total numsteps: 1377012, reward: -98.48, average reward: -101.13
Episode: 1379, total numsteps: 1377085, reward: -100.55, average reward: -101.15
Episode: 1380, total numsteps: 1377158, reward: -96.8, average reward: -101.11
Episode: 1381, total numsteps: 1377224, reward: -100.46, average reward: -101.11
Episode: 1382, total numsteps: 137729

Episode: 1473, total numsteps: 1383783, reward: -97.82, average reward: -99.15
Episode: 1474, total numsteps: 1383853, reward: -98.01, average reward: -99.14
Episode: 1475, total numsteps: 1383943, reward: -99.85, average reward: -99.15
Episode: 1476, total numsteps: 1384019, reward: -103.16, average reward: -99.2
Episode: 1477, total numsteps: 1384091, reward: -98.85, average reward: -99.22
Episode: 1478, total numsteps: 1384164, reward: -100.78, average reward: -99.24
Episode: 1479, total numsteps: 1384240, reward: -101.31, average reward: -99.25
Episode: 1480, total numsteps: 1384320, reward: -104.19, average reward: -99.32
Episode: 1481, total numsteps: 1384397, reward: -102.49, average reward: -99.34
Episode: 1482, total numsteps: 1384469, reward: -96.6, average reward: -99.32
Episode: 1483, total numsteps: 1384537, reward: -99.65, average reward: -99.34
Episode: 1484, total numsteps: 1384602, reward: -98.81, average reward: -99.32
Episode: 1485, total numsteps: 1384666, reward: -

Episode: 1577, total numsteps: 1392414, reward: -98.29, average reward: -98.62
Episode: 1578, total numsteps: 1392494, reward: -97.23, average reward: -98.59
Episode: 1579, total numsteps: 1392578, reward: -99.84, average reward: -98.57
Episode: 1580, total numsteps: 1392660, reward: -99.06, average reward: -98.52
Episode: 1581, total numsteps: 1392750, reward: -98.69, average reward: -98.48
Episode: 1582, total numsteps: 1392825, reward: -98.66, average reward: -98.5
Episode: 1583, total numsteps: 1392924, reward: -100.18, average reward: -98.51
Episode: 1584, total numsteps: 1393011, reward: -95.75, average reward: -98.48
Episode: 1585, total numsteps: 1393096, reward: -98.4, average reward: -98.49
Episode: 1586, total numsteps: 1393195, reward: -96.05, average reward: -98.49
Episode: 1587, total numsteps: 1393287, reward: -96.82, average reward: -98.44
Episode: 1588, total numsteps: 1393364, reward: -99.83, average reward: -98.44
Episode: 1589, total numsteps: 1393460, reward: -100.

Episode: 1681, total numsteps: 1432811, reward: -87.61, average reward: -101.39
Episode: 1682, total numsteps: 1434411, reward: -86.74, average reward: -101.27
Episode: 1683, total numsteps: 1436011, reward: -88.44, average reward: -101.15
Episode: 1684, total numsteps: 1437611, reward: -87.0, average reward: -101.06
Episode: 1685, total numsteps: 1439211, reward: -88.29, average reward: -100.96
Episode: 1686, total numsteps: 1440811, reward: -84.31, average reward: -100.84
Episode: 1687, total numsteps: 1442411, reward: -84.28, average reward: -100.72
Episode: 1688, total numsteps: 1444011, reward: -83.23, average reward: -100.55
Episode: 1689, total numsteps: 1445611, reward: -77.06, average reward: -100.32
Episode: 1690, total numsteps: 1447211, reward: -84.17, average reward: -100.19
Episode: 1691, total numsteps: 1448811, reward: -79.41, average reward: -99.88
Episode: 1692, total numsteps: 1450411, reward: -84.36, average reward: -99.72
Episode: 1693, total numsteps: 1452011, rew

Episode: 1785, total numsteps: 1597666, reward: -76.16, average reward: -73.53
Episode: 1786, total numsteps: 1599266, reward: -75.82, average reward: -73.45
Episode: 1787, total numsteps: 1600866, reward: -69.95, average reward: -73.31
Episode: 1788, total numsteps: 1602466, reward: -75.02, average reward: -73.22
Episode: 1789, total numsteps: 1604066, reward: -73.79, average reward: -73.19
Episode: 1790, total numsteps: 1605666, reward: -72.28, average reward: -73.07
Episode: 1791, total numsteps: 1607266, reward: -74.25, average reward: -73.02
Episode: 1792, total numsteps: 1608866, reward: -65.02, average reward: -72.83
Episode: 1793, total numsteps: 1610466, reward: -71.92, average reward: -72.77
Episode: 1794, total numsteps: 1612066, reward: -66.57, average reward: -72.62
Episode: 1795, total numsteps: 1613666, reward: -71.08, average reward: -72.57
Episode: 1796, total numsteps: 1615266, reward: -72.29, average reward: -72.48
Episode: 1797, total numsteps: 1616866, reward: -71.

Episode: 1889, total numsteps: 1701260, reward: -98.94, average reward: -85.27
Episode: 1890, total numsteps: 1701337, reward: -99.29, average reward: -85.54
Episode: 1891, total numsteps: 1701419, reward: -100.94, average reward: -85.81
Episode: 1892, total numsteps: 1701497, reward: -99.19, average reward: -86.15
Episode: 1893, total numsteps: 1701592, reward: -103.25, average reward: -86.46
Episode: 1894, total numsteps: 1701658, reward: -98.44, average reward: -86.78
Episode: 1895, total numsteps: 1701728, reward: -99.88, average reward: -87.07
Episode: 1896, total numsteps: 1701805, reward: -102.11, average reward: -87.37
Episode: 1897, total numsteps: 1701871, reward: -99.08, average reward: -87.64
Episode: 1898, total numsteps: 1701945, reward: -98.88, average reward: -87.97
Episode: 1899, total numsteps: 1702022, reward: -102.53, average reward: -88.33
Episode: 1900, total numsteps: 1702092, reward: -98.7, average reward: -88.61
Episode: 1901, total numsteps: 1702171, reward: -

Episode: 1992, total numsteps: 1740673, reward: -118.59, average reward: -109.61
Episode: 1993, total numsteps: 1740717, reward: -112.28, average reward: -109.7
Episode: 1994, total numsteps: 1740763, reward: -115.37, average reward: -109.87
Episode: 1995, total numsteps: 1740801, reward: -111.64, average reward: -109.99
Episode: 1996, total numsteps: 1740839, reward: -112.26, average reward: -110.09
Episode: 1997, total numsteps: 1740879, reward: -113.5, average reward: -110.23
Episode: 1998, total numsteps: 1740925, reward: -112.84, average reward: -110.37
Episode: 1999, total numsteps: 1742525, reward: -88.54, average reward: -110.23
Episode: 2000, total numsteps: 1742601, reward: -127.9, average reward: -110.52
Episode: 2001, total numsteps: 1744201, reward: -80.5, average reward: -110.29
Episode: 2002, total numsteps: 1745801, reward: -79.6, average reward: -110.09
Episode: 2003, total numsteps: 1747401, reward: -74.43, average reward: -109.85
Episode: 2004, total numsteps: 174900

Episode: 2095, total numsteps: 1831402, reward: -104.9, average reward: -91.65
Episode: 2096, total numsteps: 1831497, reward: -105.45, average reward: -91.58
Episode: 2097, total numsteps: 1831547, reward: -103.27, average reward: -91.48
Episode: 2098, total numsteps: 1831606, reward: -101.74, average reward: -91.37
Episode: 2099, total numsteps: 1831661, reward: -101.81, average reward: -91.5
Episode: 2100, total numsteps: 1831706, reward: -102.31, average reward: -91.25
Episode: 2101, total numsteps: 1831755, reward: -102.51, average reward: -91.47
Episode: 2102, total numsteps: 1831803, reward: -102.2, average reward: -91.69
Episode: 2103, total numsteps: 1831857, reward: -102.88, average reward: -91.98
Episode: 2104, total numsteps: 1831903, reward: -102.54, average reward: -92.27
Episode: 2105, total numsteps: 1831964, reward: -100.05, average reward: -92.49
Episode: 2106, total numsteps: 1832022, reward: -100.79, average reward: -92.74
Episode: 2107, total numsteps: 1833622, rew

Episode: 2199, total numsteps: 1896307, reward: -104.83, average reward: -100.57
Episode: 2200, total numsteps: 1896383, reward: -103.91, average reward: -100.58
Episode: 2201, total numsteps: 1896438, reward: -106.33, average reward: -100.62
Episode: 2202, total numsteps: 1896506, reward: -105.07, average reward: -100.65
Episode: 2203, total numsteps: 1896557, reward: -108.07, average reward: -100.7
Episode: 2204, total numsteps: 1896618, reward: -106.65, average reward: -100.74
Episode: 2205, total numsteps: 1896687, reward: -105.03, average reward: -100.79
Episode: 2206, total numsteps: 1898287, reward: -79.48, average reward: -100.58
Episode: 2207, total numsteps: 1898392, reward: -105.87, average reward: -100.93
Episode: 2208, total numsteps: 1898560, reward: -138.57, average reward: -101.31
Episode: 2209, total numsteps: 1898628, reward: -107.14, average reward: -101.37
Episode: 2210, total numsteps: 1898708, reward: -104.64, average reward: -101.41
Episode: 2211, total numsteps:

Episode: 2301, total numsteps: 1916859, reward: -103.89, average reward: -108.54
Episode: 2302, total numsteps: 1916970, reward: -103.12, average reward: -108.52
Episode: 2303, total numsteps: 1917041, reward: -105.35, average reward: -108.5
Episode: 2304, total numsteps: 1917135, reward: -105.8, average reward: -108.49
Episode: 2305, total numsteps: 1918735, reward: -70.61, average reward: -108.14
Episode: 2306, total numsteps: 1920335, reward: -73.05, average reward: -108.08
Episode: 2307, total numsteps: 1921033, reward: -169.56, average reward: -108.72
Episode: 2308, total numsteps: 1921147, reward: -104.48, average reward: -108.38
Episode: 2309, total numsteps: 1922747, reward: -77.6, average reward: -108.08
Episode: 2310, total numsteps: 1922838, reward: -129.48, average reward: -108.33
Episode: 2311, total numsteps: 1922889, reward: -101.7, average reward: -108.28
Episode: 2312, total numsteps: 1924489, reward: -75.14, average reward: -107.97
Episode: 2313, total numsteps: 19260

Episode: 2404, total numsteps: 2017268, reward: -104.77, average reward: -91.93
Episode: 2405, total numsteps: 2017346, reward: -110.76, average reward: -92.33
Episode: 2406, total numsteps: 2017406, reward: -105.62, average reward: -92.66
Episode: 2407, total numsteps: 2017531, reward: -110.4, average reward: -92.06
Episode: 2408, total numsteps: 2017607, reward: -113.53, average reward: -92.15
Episode: 2409, total numsteps: 2017725, reward: -109.89, average reward: -92.48
Episode: 2410, total numsteps: 2017803, reward: -111.03, average reward: -92.29
Episode: 2411, total numsteps: 2017884, reward: -110.77, average reward: -92.38
Episode: 2412, total numsteps: 2017964, reward: -110.74, average reward: -92.74
Episode: 2413, total numsteps: 2018017, reward: -109.81, average reward: -92.79
Episode: 2414, total numsteps: 2018116, reward: -110.79, average reward: -92.81
Episode: 2415, total numsteps: 2018196, reward: -112.96, average reward: -92.94
Episode: 2416, total numsteps: 2018276, r

Episode: 2507, total numsteps: 2023728, reward: -101.47, average reward: -106.83
Episode: 2508, total numsteps: 2023786, reward: -101.46, average reward: -106.71
Episode: 2509, total numsteps: 2023839, reward: -101.68, average reward: -106.63
Episode: 2510, total numsteps: 2023888, reward: -103.44, average reward: -106.56
Episode: 2511, total numsteps: 2023942, reward: -101.68, average reward: -106.46
Episode: 2512, total numsteps: 2023994, reward: -101.77, average reward: -106.37
Episode: 2513, total numsteps: 2024043, reward: -102.25, average reward: -106.3
Episode: 2514, total numsteps: 2024100, reward: -101.55, average reward: -106.21
Episode: 2515, total numsteps: 2024153, reward: -103.37, average reward: -106.11
Episode: 2516, total numsteps: 2024211, reward: -102.6, average reward: -106.02
Episode: 2517, total numsteps: 2024272, reward: -101.67, average reward: -105.95
Episode: 2518, total numsteps: 2024323, reward: -103.0, average reward: -105.91
Episode: 2519, total numsteps: 

Episode: 2609, total numsteps: 2030852, reward: -100.64, average reward: -101.03
Episode: 2610, total numsteps: 2030921, reward: -98.78, average reward: -100.99
Episode: 2611, total numsteps: 2030993, reward: -99.67, average reward: -100.97
Episode: 2612, total numsteps: 2031066, reward: -100.48, average reward: -100.95
Episode: 2613, total numsteps: 2031143, reward: -100.4, average reward: -100.94
Episode: 2614, total numsteps: 2031216, reward: -100.55, average reward: -100.93
Episode: 2615, total numsteps: 2031293, reward: -99.58, average reward: -100.89
Episode: 2616, total numsteps: 2031366, reward: -100.17, average reward: -100.86
Episode: 2617, total numsteps: 2031445, reward: -99.62, average reward: -100.84
Episode: 2618, total numsteps: 2031514, reward: -100.55, average reward: -100.82
Episode: 2619, total numsteps: 2031597, reward: -102.02, average reward: -100.8
Episode: 2620, total numsteps: 2031674, reward: -99.96, average reward: -100.78
Episode: 2621, total numsteps: 2031

Episode: 2711, total numsteps: 2038625, reward: -101.96, average reward: -100.66
Episode: 2712, total numsteps: 2038692, reward: -101.1, average reward: -100.67
Episode: 2713, total numsteps: 2038769, reward: -99.97, average reward: -100.66
Episode: 2714, total numsteps: 2038848, reward: -101.69, average reward: -100.67
Episode: 2715, total numsteps: 2038938, reward: -103.97, average reward: -100.72
Episode: 2716, total numsteps: 2039006, reward: -99.68, average reward: -100.71
Episode: 2717, total numsteps: 2039065, reward: -100.23, average reward: -100.72
Episode: 2718, total numsteps: 2039139, reward: -99.59, average reward: -100.71
Episode: 2719, total numsteps: 2039218, reward: -101.0, average reward: -100.7
Episode: 2720, total numsteps: 2039285, reward: -101.54, average reward: -100.72
Episode: 2721, total numsteps: 2039375, reward: -102.05, average reward: -100.73
Episode: 2722, total numsteps: 2039431, reward: -100.57, average reward: -100.74
Episode: 2723, total numsteps: 203

Episode: 2813, total numsteps: 2046413, reward: -100.49, average reward: -100.75
Episode: 2814, total numsteps: 2046492, reward: -101.71, average reward: -100.75
Episode: 2815, total numsteps: 2046570, reward: -99.91, average reward: -100.71
Episode: 2816, total numsteps: 2046648, reward: -99.63, average reward: -100.71
Episode: 2817, total numsteps: 2046757, reward: -102.35, average reward: -100.73
Episode: 2818, total numsteps: 2046812, reward: -101.53, average reward: -100.75
Episode: 2819, total numsteps: 2046919, reward: -104.21, average reward: -100.78
Episode: 2820, total numsteps: 2047007, reward: -103.23, average reward: -100.8
Episode: 2821, total numsteps: 2047130, reward: -103.97, average reward: -100.82
Episode: 2822, total numsteps: 2047206, reward: -100.95, average reward: -100.82
Episode: 2823, total numsteps: 2047322, reward: -105.29, average reward: -100.83
Episode: 2824, total numsteps: 2047398, reward: -100.11, average reward: -100.83
Episode: 2825, total numsteps: 

Episode: 2915, total numsteps: 2061005, reward: -79.21, average reward: -103.54
Episode: 2916, total numsteps: 2062605, reward: -78.44, average reward: -103.33
Episode: 2917, total numsteps: 2062730, reward: -101.47, average reward: -103.32
Episode: 2918, total numsteps: 2062839, reward: -101.98, average reward: -103.32
Episode: 2919, total numsteps: 2062917, reward: -99.47, average reward: -103.27
Episode: 2920, total numsteps: 2063005, reward: -117.32, average reward: -103.41
Episode: 2921, total numsteps: 2063078, reward: -101.07, average reward: -103.39
Episode: 2922, total numsteps: 2063174, reward: -117.1, average reward: -103.55
Episode: 2923, total numsteps: 2063284, reward: -99.99, average reward: -103.49
Episode: 2924, total numsteps: 2063362, reward: -100.28, average reward: -103.5
Episode: 2925, total numsteps: 2063441, reward: -101.31, average reward: -103.51
Episode: 2926, total numsteps: 2065041, reward: -78.6, average reward: -103.29
Episode: 2927, total numsteps: 20653

Episode: 3017, total numsteps: 2126759, reward: -91.5, average reward: -107.68
Episode: 3018, total numsteps: 2128359, reward: -90.89, average reward: -107.57
Episode: 3019, total numsteps: 2129959, reward: -89.98, average reward: -107.47
Episode: 3020, total numsteps: 2131559, reward: -90.3, average reward: -107.2
Episode: 3021, total numsteps: 2133159, reward: -86.33, average reward: -107.06
Episode: 3022, total numsteps: 2133228, reward: -105.07, average reward: -106.93
Episode: 3023, total numsteps: 2133344, reward: -101.3, average reward: -106.95
Episode: 3024, total numsteps: 2133410, reward: -107.41, average reward: -107.02
Episode: 3025, total numsteps: 2133532, reward: -114.54, average reward: -107.15
Episode: 3026, total numsteps: 2133601, reward: -111.34, average reward: -107.48
Episode: 3027, total numsteps: 2135201, reward: -91.88, average reward: -107.22
Episode: 3028, total numsteps: 2136801, reward: -93.19, average reward: -107.15
Episode: 3029, total numsteps: 2138401,

Episode: 3120, total numsteps: 2201953, reward: -86.26, average reward: -101.99
Episode: 3121, total numsteps: 2203553, reward: -92.32, average reward: -102.05
Episode: 3122, total numsteps: 2205153, reward: -93.25, average reward: -101.93
Episode: 3123, total numsteps: 2205198, reward: -121.21, average reward: -102.13
Episode: 3124, total numsteps: 2205243, reward: -120.3, average reward: -102.26
Episode: 3125, total numsteps: 2206843, reward: -90.89, average reward: -102.03
Episode: 3126, total numsteps: 2208443, reward: -85.77, average reward: -101.77
Episode: 3127, total numsteps: 2208688, reward: -122.53, average reward: -102.08
Episode: 3128, total numsteps: 2208817, reward: -134.85, average reward: -102.49
Episode: 3129, total numsteps: 2208909, reward: -107.9, average reward: -102.68
Episode: 3130, total numsteps: 2208948, reward: -116.24, average reward: -102.84
Episode: 3131, total numsteps: 2209060, reward: -111.7, average reward: -102.87
Episode: 3132, total numsteps: 22106

Episode: 3222, total numsteps: 2220109, reward: -109.22, average reward: -118.68
Episode: 3223, total numsteps: 2220183, reward: -109.89, average reward: -118.56
Episode: 3224, total numsteps: 2220255, reward: -108.97, average reward: -118.45
Episode: 3225, total numsteps: 2220313, reward: -107.86, average reward: -118.62
Episode: 3226, total numsteps: 2220577, reward: -132.99, average reward: -119.09
Episode: 3227, total numsteps: 2220682, reward: -117.75, average reward: -119.04
Episode: 3228, total numsteps: 2221064, reward: -135.44, average reward: -119.05
Episode: 3229, total numsteps: 2221478, reward: -131.2, average reward: -119.28
Episode: 3230, total numsteps: 2221532, reward: -106.64, average reward: -119.19
Episode: 3231, total numsteps: 2221616, reward: -108.84, average reward: -119.16
Episode: 3232, total numsteps: 2221671, reward: -108.45, average reward: -119.37
Episode: 3233, total numsteps: 2221727, reward: -107.44, average reward: -119.22
Episode: 3234, total numsteps

Episode: 3324, total numsteps: 2236635, reward: -118.85, average reward: -114.95
Episode: 3325, total numsteps: 2236745, reward: -101.81, average reward: -114.89
Episode: 3326, total numsteps: 2236803, reward: -118.74, average reward: -114.75
Episode: 3327, total numsteps: 2236881, reward: -101.85, average reward: -114.59
Episode: 3328, total numsteps: 2236961, reward: -121.55, average reward: -114.45
Episode: 3329, total numsteps: 2237030, reward: -119.08, average reward: -114.33
Episode: 3330, total numsteps: 2237082, reward: -116.67, average reward: -114.43
Episode: 3331, total numsteps: 2237150, reward: -119.62, average reward: -114.53
Episode: 3332, total numsteps: 2237205, reward: -118.69, average reward: -114.64
Episode: 3333, total numsteps: 2237265, reward: -118.35, average reward: -114.75
Episode: 3334, total numsteps: 2237316, reward: -117.07, average reward: -114.85
Episode: 3335, total numsteps: 2237359, reward: -115.89, average reward: -114.93
Episode: 3336, total numstep

Episode: 3426, total numsteps: 2247618, reward: -118.62, average reward: -116.83
Episode: 3427, total numsteps: 2247670, reward: -115.81, average reward: -116.97
Episode: 3428, total numsteps: 2247713, reward: -115.8, average reward: -116.91
Episode: 3429, total numsteps: 2247751, reward: -114.46, average reward: -116.86
Episode: 3430, total numsteps: 2247799, reward: -113.73, average reward: -116.83
Episode: 3431, total numsteps: 2247840, reward: -114.65, average reward: -116.78
Episode: 3432, total numsteps: 2247888, reward: -114.19, average reward: -116.74
Episode: 3433, total numsteps: 2247938, reward: -113.72, average reward: -116.69
Episode: 3434, total numsteps: 2247987, reward: -115.36, average reward: -116.67
Episode: 3435, total numsteps: 2248021, reward: -113.2, average reward: -116.65
Episode: 3436, total numsteps: 2248059, reward: -113.85, average reward: -116.62
Episode: 3437, total numsteps: 2248093, reward: -112.56, average reward: -116.53
Episode: 3438, total numsteps:

Episode: 3528, total numsteps: 2260627, reward: -118.22, average reward: -115.87
Episode: 3529, total numsteps: 2260670, reward: -112.66, average reward: -115.85
Episode: 3530, total numsteps: 2260776, reward: -112.98, average reward: -115.85
Episode: 3531, total numsteps: 2260832, reward: -114.51, average reward: -115.84
Episode: 3532, total numsteps: 2260909, reward: -118.42, average reward: -115.89
Episode: 3533, total numsteps: 2261009, reward: -122.72, average reward: -115.98
Episode: 3534, total numsteps: 2261124, reward: -117.09, average reward: -115.99
Episode: 3535, total numsteps: 2261268, reward: -120.16, average reward: -116.06
Episode: 3536, total numsteps: 2261434, reward: -127.66, average reward: -116.2
Episode: 3537, total numsteps: 2261555, reward: -115.05, average reward: -116.23
Episode: 3538, total numsteps: 2261644, reward: -104.55, average reward: -116.11
Episode: 3539, total numsteps: 2263244, reward: -103.01, average reward: -116.02
Episode: 3540, total numsteps

Episode: 3630, total numsteps: 2317419, reward: -129.27, average reward: -109.79
Episode: 3631, total numsteps: 2319019, reward: -72.15, average reward: -109.37
Episode: 3632, total numsteps: 2320619, reward: -78.57, average reward: -108.97
Episode: 3633, total numsteps: 2322219, reward: -76.87, average reward: -108.51
Episode: 3634, total numsteps: 2323819, reward: -82.43, average reward: -108.16
Episode: 3635, total numsteps: 2325419, reward: -77.15, average reward: -107.73
Episode: 3636, total numsteps: 2327019, reward: -71.3, average reward: -107.17
Episode: 3637, total numsteps: 2327129, reward: -128.89, average reward: -107.31
Episode: 3638, total numsteps: 2328729, reward: -65.34, average reward: -106.92
Episode: 3639, total numsteps: 2330329, reward: -75.0, average reward: -106.64
Episode: 3640, total numsteps: 2331929, reward: -74.9, average reward: -106.21
Episode: 3641, total numsteps: 2332052, reward: -127.17, average reward: -106.44
Episode: 3642, total numsteps: 2333652, 

Episode: 3732, total numsteps: 2367730, reward: -123.8, average reward: -112.81
Episode: 3733, total numsteps: 2369330, reward: -62.28, average reward: -112.67
Episode: 3734, total numsteps: 2370930, reward: -57.56, average reward: -112.42
Episode: 3735, total numsteps: 2372530, reward: -52.4, average reward: -112.17
Episode: 3736, total numsteps: 2374130, reward: -52.45, average reward: -111.98
Episode: 3737, total numsteps: 2375730, reward: -78.77, average reward: -111.48
Episode: 3738, total numsteps: 2375890, reward: -100.34, average reward: -111.83
Episode: 3739, total numsteps: 2376365, reward: -141.78, average reward: -112.5
Episode: 3740, total numsteps: 2377965, reward: -65.57, average reward: -112.4
Episode: 3741, total numsteps: 2379565, reward: -98.7, average reward: -112.12
Episode: 3742, total numsteps: 2380336, reward: -154.34, average reward: -112.93
Episode: 3743, total numsteps: 2381015, reward: -160.36, average reward: -113.76
Episode: 3744, total numsteps: 2381594, 

Episode: 3834, total numsteps: 2409627, reward: -105.08, average reward: -102.26
Episode: 3835, total numsteps: 2409703, reward: -102.48, average reward: -102.76
Episode: 3836, total numsteps: 2409803, reward: -105.01, average reward: -103.28
Episode: 3837, total numsteps: 2409848, reward: -102.35, average reward: -103.52
Episode: 3838, total numsteps: 2409913, reward: -101.0, average reward: -103.52
Episode: 3839, total numsteps: 2410002, reward: -104.08, average reward: -103.15
Episode: 3840, total numsteps: 2410116, reward: -105.93, average reward: -103.55
Episode: 3841, total numsteps: 2410182, reward: -100.26, average reward: -103.57
Episode: 3842, total numsteps: 2410262, reward: -102.16, average reward: -103.04
Episode: 3843, total numsteps: 2410346, reward: -104.59, average reward: -102.49
Episode: 3844, total numsteps: 2410446, reward: -102.5, average reward: -102.3
Episode: 3845, total numsteps: 2410545, reward: -105.01, average reward: -102.05
Episode: 3846, total numsteps: 

Episode: 3936, total numsteps: 2419425, reward: -105.04, average reward: -104.25
Episode: 3937, total numsteps: 2419501, reward: -100.86, average reward: -104.23
Episode: 3938, total numsteps: 2419590, reward: -103.5, average reward: -104.26
Episode: 3939, total numsteps: 2419648, reward: -102.58, average reward: -104.24
Episode: 3940, total numsteps: 2419712, reward: -110.82, average reward: -104.29
Episode: 3941, total numsteps: 2419767, reward: -101.28, average reward: -104.3
Episode: 3942, total numsteps: 2419842, reward: -102.76, average reward: -104.31
Episode: 3943, total numsteps: 2419896, reward: -101.96, average reward: -104.28
Episode: 3944, total numsteps: 2419964, reward: -103.32, average reward: -104.29
Episode: 3945, total numsteps: 2420067, reward: -102.57, average reward: -104.26
Episode: 3946, total numsteps: 2420140, reward: -101.23, average reward: -104.25
Episode: 3947, total numsteps: 2420202, reward: -100.82, average reward: -104.24
Episode: 3948, total numsteps:

Episode: 4038, total numsteps: 2426691, reward: -100.32, average reward: -103.57
Episode: 4039, total numsteps: 2426750, reward: -109.55, average reward: -103.64
Episode: 4040, total numsteps: 2426848, reward: -114.88, average reward: -103.68
Episode: 4041, total numsteps: 2426917, reward: -102.66, average reward: -103.69
Episode: 4042, total numsteps: 2427020, reward: -100.71, average reward: -103.67
Episode: 4043, total numsteps: 2427103, reward: -100.53, average reward: -103.66
Episode: 4044, total numsteps: 2427201, reward: -98.7, average reward: -103.61
Episode: 4045, total numsteps: 2427302, reward: -111.32, average reward: -103.7
Episode: 4046, total numsteps: 2427351, reward: -107.06, average reward: -103.75
Episode: 4047, total numsteps: 2427506, reward: -104.56, average reward: -103.79
Episode: 4048, total numsteps: 2427568, reward: -108.57, average reward: -103.73
Episode: 4049, total numsteps: 2427638, reward: -107.45, average reward: -103.72
Episode: 4050, total numsteps: 

Episode: 4140, total numsteps: 2457676, reward: -118.5, average reward: -115.86
Episode: 4141, total numsteps: 2457792, reward: -111.26, average reward: -115.94
Episode: 4142, total numsteps: 2457892, reward: -114.51, average reward: -116.08
Episode: 4143, total numsteps: 2457970, reward: -120.81, average reward: -116.28
Episode: 4144, total numsteps: 2458099, reward: -117.04, average reward: -116.47
Episode: 4145, total numsteps: 2458235, reward: -108.23, average reward: -116.44
Episode: 4146, total numsteps: 2459835, reward: -86.02, average reward: -116.23
Episode: 4147, total numsteps: 2461435, reward: -83.81, average reward: -116.02
Episode: 4148, total numsteps: 2461551, reward: -130.11, average reward: -116.23
Episode: 4149, total numsteps: 2461713, reward: -114.33, average reward: -116.3
Episode: 4150, total numsteps: 2462037, reward: -136.38, average reward: -116.62
Episode: 4151, total numsteps: 2463637, reward: -87.07, average reward: -116.24
Episode: 4152, total numsteps: 24

KeyboardInterrupt: 

In [None]:
def test(agent):   
    random_seed = 0
    episodes = 3
    max_timesteps = 2000
    render = True
    save_gif = True
     
    for i_episode in range(1, episodes):
        state = env.reset()
        episode_reward = 0
        for t in range(10000):  # Don't infinite loop while learning
            action = agent.select_action(state)  # Sample action from policy
            next_state, reward, done, _ = env.step(action)  # Step
           
            if render:
                env.render()  
                if save_gif:
                    dirname = './gif/sac/{}'.format(i_episode)
                    if not os.path.isdir(dirname):
                        os.mkdir(dirname)
                    img = env.render(mode = 'rgb_array')
                    img = Image.fromarray(img)
                    img.save('./gif/sac/{}/{}.jpg'.format(i_episode,t))

            state = next_state            
            episode_reward += reward

            if done:
                break    
   
            
        print('Episode: {}\tReward: {}'.format(i_episode, int(episode_reward)))
        running_reward = 0
        env.close()        
                
test(agent)