In [2]:
import numpy as np
from agents.ppo_agent import PPOAgent
from env.wavefront_env import WavefrontEnv
from train.train import train_ppo
from utils.plotting import plot_training_curve, visualize_best_mask

def main(run):
    np.random.seed(42)
    phi = np.random.rand(64, 64)

    # Create environment
    env = WavefrontEnv(slm_dim1=64, slm_dim2=64, num_pix_per_block=32, alpha=0.3, noise_sigma=0.05, k=0.45, phi=phi)
    
    # Create PPO agent
    agent = PPOAgent(
        state_dim=env.state_dim,
        action_dim=env.action_space,
        lr=3e-4,
        actor_lr=3e-4,
        critic_lr=1e-4,
        gamma=0.95,
        clip_ratio=0.2,
        value_coef=0.7,
        entropy_coef=0.001,
        gae_lambda=0.95
    )
    
    # Train agent
    print("Starting training...")
    results = train_ppo(
        env=env,
        agent=agent,
        max_episodes=3000,  # Adjust based on your time constraints
        steps_per_episode=128,
        update_interval=5,
        eval_interval=25
    )
    
    # Save the trained model
    agent.save(f'results/run_{run}_wavefront_ppo_model.pth')
    
    # Visualize results
    plot_training_curve(results, run)
    visualize_best_mask(env, results['best_mask'], run)
    
    print(f"Training completed. Best intensity achieved: {results['best_intensity']:.4f}")


main(20)

Starting training...


  0%|          | 5/3000 [00:01<22:23,  2.23it/s]

Episode 5, Reward: 18.7200
Policy Loss: -0.0408, Value Loss: 0.1322, Entropy Loss: -88.6117
Current Intensity: 0.0999, Max Intensity: 1.8183
---


  0%|          | 10/3000 [00:03<21:48,  2.28it/s]

Episode 10, Reward: 18.0386
Policy Loss: -0.1274, Value Loss: 0.0408, Entropy Loss: -88.5899
Current Intensity: 0.3949, Max Intensity: 1.8393
---


  0%|          | 15/3000 [00:05<20:39,  2.41it/s]

Episode 15, Reward: 18.0124
Policy Loss: -0.1464, Value Loss: 0.0578, Entropy Loss: -88.5687
Current Intensity: 0.3678, Max Intensity: 2.1942
---


  1%|          | 20/3000 [00:06<21:33,  2.30it/s]

Episode 20, Reward: 17.4378
Policy Loss: -0.1415, Value Loss: 0.0554, Entropy Loss: -88.5449
Current Intensity: 0.3008, Max Intensity: 2.2551
---


  1%|          | 24/3000 [00:07<13:54,  3.57it/s]

Episode 25, Reward: 19.0118
Policy Loss: -0.1109, Value Loss: 0.0602, Entropy Loss: -88.5367
Current Intensity: 0.2585, Max Intensity: 2.5129
---


  1%|          | 26/3000 [00:09<28:58,  1.71it/s]

Evaluation at episode 25: Average Reward = 32.3419


  1%|          | 30/3000 [00:11<23:02,  2.15it/s]

Episode 30, Reward: 17.9794
Policy Loss: -0.0618, Value Loss: 0.0600, Entropy Loss: -88.5377
Current Intensity: 0.0248, Max Intensity: 2.3979
---


  1%|          | 35/3000 [00:13<21:35,  2.29it/s]

Episode 35, Reward: 19.7025
Policy Loss: -0.1258, Value Loss: 0.0615, Entropy Loss: -88.5198
Current Intensity: 0.9448, Max Intensity: 2.1238
---


  1%|▏         | 40/3000 [00:14<20:40,  2.39it/s]

Episode 40, Reward: 19.1203
Policy Loss: -0.0812, Value Loss: 0.0544, Entropy Loss: -88.4829
Current Intensity: 0.5641, Max Intensity: 2.1615
---


  2%|▏         | 45/3000 [00:16<22:33,  2.18it/s]

Episode 45, Reward: 17.6061
Policy Loss: -0.2284, Value Loss: 0.0575, Entropy Loss: -88.4327
Current Intensity: 0.0708, Max Intensity: 2.3149
---


  2%|▏         | 49/3000 [00:17<14:00,  3.51it/s]

Episode 50, Reward: 19.2633
Policy Loss: -0.1067, Value Loss: 0.0501, Entropy Loss: -88.4249
Current Intensity: 0.2883, Max Intensity: 2.1279
---


  2%|▏         | 50/3000 [00:19<36:46,  1.34it/s]

Evaluation at episode 50: Average Reward = 24.5589


  2%|▏         | 55/3000 [00:20<22:53,  2.14it/s]

Episode 55, Reward: 19.0336
Policy Loss: -0.0741, Value Loss: 0.0557, Entropy Loss: -88.3592
Current Intensity: 0.3163, Max Intensity: 1.9930
---


  2%|▏         | 60/3000 [00:22<21:20,  2.30it/s]

Episode 60, Reward: 20.9016
Policy Loss: -0.2755, Value Loss: 0.0416, Entropy Loss: -88.3475
Current Intensity: 0.4368, Max Intensity: 2.2674
---


  2%|▏         | 65/3000 [00:24<22:12,  2.20it/s]

Episode 65, Reward: 16.9259
Policy Loss: -0.0802, Value Loss: 0.0398, Entropy Loss: -88.3172
Current Intensity: 0.3459, Max Intensity: 2.1712
---


  2%|▏         | 70/3000 [00:26<26:24,  1.85it/s]

Episode 70, Reward: 17.2424
Policy Loss: -0.1598, Value Loss: 0.0687, Entropy Loss: -88.2841
Current Intensity: 0.2853, Max Intensity: 2.1447
---


  2%|▏         | 74/3000 [00:27<16:59,  2.87it/s]

Episode 75, Reward: 17.0424
Policy Loss: -0.1832, Value Loss: 0.0409, Entropy Loss: -88.2519
Current Intensity: 0.4531, Max Intensity: 1.8745
---


  2%|▎         | 75/3000 [00:29<45:25,  1.07it/s]

Evaluation at episode 75: Average Reward = 37.3041


  3%|▎         | 80/3000 [00:32<29:27,  1.65it/s]

Episode 80, Reward: 17.6908
Policy Loss: -0.1497, Value Loss: 0.0311, Entropy Loss: -88.2535
Current Intensity: 0.4023, Max Intensity: 2.5289
---


  3%|▎         | 85/3000 [00:34<26:15,  1.85it/s]

Episode 85, Reward: 18.0154
Policy Loss: -0.1145, Value Loss: 0.0440, Entropy Loss: -88.2603
Current Intensity: 0.5333, Max Intensity: 2.4320
---


  3%|▎         | 90/3000 [00:36<26:17,  1.84it/s]

Episode 90, Reward: 16.7338
Policy Loss: -0.1575, Value Loss: 0.0515, Entropy Loss: -88.2434
Current Intensity: 0.5621, Max Intensity: 2.5942
---


  3%|▎         | 95/3000 [00:38<25:39,  1.89it/s]

Episode 95, Reward: 18.2019
Policy Loss: -0.1369, Value Loss: 0.0443, Entropy Loss: -88.2194
Current Intensity: 0.0994, Max Intensity: 2.5749
---


  3%|▎         | 99/3000 [00:39<16:31,  2.92it/s]

Episode 100, Reward: 19.2591
Policy Loss: -0.0759, Value Loss: 0.0299, Entropy Loss: -88.1332
Current Intensity: 0.1642, Max Intensity: 2.5287
---


  3%|▎         | 100/3000 [00:41<43:56,  1.10it/s]

Evaluation at episode 100: Average Reward = 52.6534


  4%|▎         | 105/3000 [00:44<29:14,  1.65it/s]

Episode 105, Reward: 18.1783
Policy Loss: -0.0556, Value Loss: 0.0456, Entropy Loss: -88.0961
Current Intensity: 0.6107, Max Intensity: 2.3866
---


  4%|▎         | 110/3000 [00:46<25:38,  1.88it/s]

Episode 110, Reward: 18.9148
Policy Loss: -0.1915, Value Loss: 0.0345, Entropy Loss: -88.1304
Current Intensity: 0.3056, Max Intensity: 2.2077
---


  4%|▍         | 115/3000 [00:48<24:45,  1.94it/s]

Episode 115, Reward: 18.9223
Policy Loss: -0.0463, Value Loss: 0.0295, Entropy Loss: -88.0789
Current Intensity: 0.2438, Max Intensity: 2.1812
---


  4%|▍         | 120/3000 [00:50<25:15,  1.90it/s]

Episode 120, Reward: 18.7569
Policy Loss: -0.2218, Value Loss: 0.0387, Entropy Loss: -88.0797
Current Intensity: 0.0574, Max Intensity: 2.8437
---


  4%|▍         | 124/3000 [00:51<16:08,  2.97it/s]

Episode 125, Reward: 17.8124
Policy Loss: -0.0404, Value Loss: 0.0476, Entropy Loss: -88.0211
Current Intensity: 0.6236, Max Intensity: 2.5008
---


  4%|▍         | 125/3000 [00:53<44:33,  1.08it/s]

Evaluation at episode 125: Average Reward = 50.4823


  4%|▍         | 130/3000 [00:55<28:04,  1.70it/s]

Episode 130, Reward: 18.9936
Policy Loss: -0.1011, Value Loss: 0.0315, Entropy Loss: -88.0099
Current Intensity: 0.2896, Max Intensity: 1.9358
---


  4%|▍         | 135/3000 [00:57<25:19,  1.89it/s]

Episode 135, Reward: 19.3818
Policy Loss: -0.2170, Value Loss: 0.0370, Entropy Loss: -88.0456
Current Intensity: 0.2491, Max Intensity: 2.5359
---


  5%|▍         | 140/3000 [00:59<25:31,  1.87it/s]

Episode 140, Reward: 17.2256
Policy Loss: -0.2486, Value Loss: 0.0450, Entropy Loss: -88.0205
Current Intensity: 0.9222, Max Intensity: 2.4481
---


  5%|▍         | 145/3000 [01:02<25:25,  1.87it/s]

Episode 145, Reward: 20.4253
Policy Loss: -0.2717, Value Loss: 0.0330, Entropy Loss: -87.9667
Current Intensity: 0.3682, Max Intensity: 2.4931
---


  5%|▍         | 149/3000 [01:03<16:12,  2.93it/s]

Episode 150, Reward: 16.0579
Policy Loss: -0.0462, Value Loss: 0.0363, Entropy Loss: -87.8724
Current Intensity: 0.2539, Max Intensity: 2.4800
---


  5%|▌         | 150/3000 [01:05<43:05,  1.10it/s]

Evaluation at episode 150: Average Reward = 55.0453


  5%|▌         | 155/3000 [01:07<28:32,  1.66it/s]

Episode 155, Reward: 20.9947
Policy Loss: -0.2945, Value Loss: 0.0391, Entropy Loss: -87.8649
Current Intensity: 0.4162, Max Intensity: 2.2921
---


  5%|▌         | 160/3000 [01:09<24:25,  1.94it/s]

Episode 160, Reward: 20.2746
Policy Loss: -0.1061, Value Loss: 0.0485, Entropy Loss: -87.7496
Current Intensity: 0.2917, Max Intensity: 2.3950
---


  6%|▌         | 165/3000 [01:11<25:49,  1.83it/s]

Episode 165, Reward: 19.4127
Policy Loss: -0.0951, Value Loss: 0.0458, Entropy Loss: -87.7814
Current Intensity: 1.1189, Max Intensity: 2.3312
---


  6%|▌         | 170/3000 [01:13<23:42,  1.99it/s]

Episode 170, Reward: 16.6867
Policy Loss: -0.2633, Value Loss: 0.0330, Entropy Loss: -87.7634
Current Intensity: 0.2166, Max Intensity: 2.3212
---


  6%|▌         | 174/3000 [01:14<15:18,  3.08it/s]

Episode 175, Reward: 19.8112
Policy Loss: -0.1020, Value Loss: 0.0517, Entropy Loss: -87.7191
Current Intensity: 0.2011, Max Intensity: 2.2094
---


  6%|▌         | 175/3000 [01:17<45:14,  1.04it/s]

Evaluation at episode 175: Average Reward = 60.7342


  6%|▌         | 180/3000 [01:19<26:32,  1.77it/s]

Episode 180, Reward: 18.5575
Policy Loss: -0.0702, Value Loss: 0.0388, Entropy Loss: -87.6759
Current Intensity: 0.6272, Max Intensity: 3.9991
---


  6%|▌         | 185/3000 [01:21<25:43,  1.82it/s]

Episode 185, Reward: 20.9910
Policy Loss: 0.0765, Value Loss: 0.0306, Entropy Loss: -87.6412
Current Intensity: 0.7367, Max Intensity: 3.8812
---


  6%|▋         | 190/3000 [01:23<25:45,  1.82it/s]

Episode 190, Reward: 16.4596
Policy Loss: -0.2266, Value Loss: 0.0371, Entropy Loss: -87.6514
Current Intensity: 0.3139, Max Intensity: 3.9646
---


  6%|▋         | 195/3000 [01:25<24:27,  1.91it/s]

Episode 195, Reward: 20.0275
Policy Loss: -0.1827, Value Loss: 0.0275, Entropy Loss: -87.6406
Current Intensity: 0.2268, Max Intensity: 3.9961
---


  7%|▋         | 199/3000 [01:27<17:38,  2.65it/s]

Episode 200, Reward: 19.4547
Policy Loss: -0.2807, Value Loss: 0.0249, Entropy Loss: -87.6607
Current Intensity: 0.4170, Max Intensity: 3.7247
---


  7%|▋         | 200/3000 [01:29<46:01,  1.01it/s]

Evaluation at episode 200: Average Reward = 76.4515


  7%|▋         | 205/3000 [01:31<28:43,  1.62it/s]

Episode 205, Reward: 19.5085
Policy Loss: -0.0542, Value Loss: 0.0318, Entropy Loss: -87.5382
Current Intensity: 0.0111, Max Intensity: 3.9329
---


  7%|▋         | 210/3000 [01:33<25:58,  1.79it/s]

Episode 210, Reward: 17.1759
Policy Loss: -0.2345, Value Loss: 0.0270, Entropy Loss: -87.4843
Current Intensity: 0.5630, Max Intensity: 2.9849
---


  7%|▋         | 215/3000 [01:35<25:13,  1.84it/s]

Episode 215, Reward: 17.7124
Policy Loss: -0.1955, Value Loss: 0.0348, Entropy Loss: -87.4869
Current Intensity: 0.4911, Max Intensity: 3.8214
---


  7%|▋         | 220/3000 [01:38<27:27,  1.69it/s]

Episode 220, Reward: 19.1904
Policy Loss: -0.1325, Value Loss: 0.0337, Entropy Loss: -87.3515
Current Intensity: 0.3571, Max Intensity: 3.8605
---


  7%|▋         | 224/3000 [01:39<16:07,  2.87it/s]

Episode 225, Reward: 21.9749
Policy Loss: -0.1110, Value Loss: 0.0489, Entropy Loss: -87.3192
Current Intensity: 0.4026, Max Intensity: 3.8569
---


  8%|▊         | 225/3000 [01:41<42:52,  1.08it/s]

Evaluation at episode 225: Average Reward = 63.7480


  8%|▊         | 230/3000 [01:44<29:21,  1.57it/s]

Episode 230, Reward: 20.2123
Policy Loss: -0.2081, Value Loss: 0.0438, Entropy Loss: -87.3151
Current Intensity: 0.1586, Max Intensity: 2.9576
---


  8%|▊         | 235/3000 [01:46<25:17,  1.82it/s]

Episode 235, Reward: 17.6023
Policy Loss: -0.2856, Value Loss: 0.0221, Entropy Loss: -87.2543
Current Intensity: 0.4043, Max Intensity: 3.3619
---


  8%|▊         | 240/3000 [01:48<26:48,  1.72it/s]

Episode 240, Reward: 18.5820
Policy Loss: -0.1950, Value Loss: 0.0459, Entropy Loss: -87.2359
Current Intensity: 0.5527, Max Intensity: 3.3276
---


  8%|▊         | 245/3000 [01:50<24:28,  1.88it/s]

Episode 245, Reward: 17.4580
Policy Loss: -0.2659, Value Loss: 0.0508, Entropy Loss: -87.2630
Current Intensity: 0.5738, Max Intensity: 3.2761
---


  8%|▊         | 249/3000 [01:51<15:41,  2.92it/s]

Episode 250, Reward: 19.1892
Policy Loss: -0.1749, Value Loss: 0.0362, Entropy Loss: -87.2263
Current Intensity: 0.7770, Max Intensity: 3.3495
---


  8%|▊         | 250/3000 [01:54<47:46,  1.04s/it]

Evaluation at episode 250: Average Reward = 70.5701


  8%|▊         | 255/3000 [01:56<31:44,  1.44it/s]

Episode 255, Reward: 19.1874
Policy Loss: -0.1482, Value Loss: 0.0446, Entropy Loss: -87.2344
Current Intensity: 0.1366, Max Intensity: 3.3562
---


  9%|▊         | 260/3000 [01:59<28:27,  1.60it/s]

Episode 260, Reward: 20.4528
Policy Loss: -0.2655, Value Loss: 0.0299, Entropy Loss: -87.1435
Current Intensity: 0.2446, Max Intensity: 3.2806
---


  9%|▉         | 265/3000 [02:01<25:12,  1.81it/s]

Episode 265, Reward: 17.2163
Policy Loss: -0.0165, Value Loss: 0.0263, Entropy Loss: -87.0206
Current Intensity: 0.8447, Max Intensity: 3.3731
---


  9%|▉         | 270/3000 [02:04<29:52,  1.52it/s]

Episode 270, Reward: 17.4146
Policy Loss: -0.1893, Value Loss: 0.0439, Entropy Loss: -86.9231
Current Intensity: 0.1874, Max Intensity: 3.3114
---


  9%|▉         | 274/3000 [02:05<16:40,  2.72it/s]

Episode 275, Reward: 17.9174
Policy Loss: -0.1312, Value Loss: 0.0247, Entropy Loss: -86.9992
Current Intensity: -0.0389, Max Intensity: 3.3375
---


  9%|▉         | 275/3000 [02:07<45:21,  1.00it/s]

Evaluation at episode 275: Average Reward = 78.9927


  9%|▉         | 280/3000 [02:10<29:37,  1.53it/s]

Episode 280, Reward: 17.9834
Policy Loss: -0.1957, Value Loss: 0.0306, Entropy Loss: -86.9114
Current Intensity: 0.1221, Max Intensity: 3.7718
---


 10%|▉         | 285/3000 [02:12<26:05,  1.73it/s]

Episode 285, Reward: 18.8216
Policy Loss: -0.0884, Value Loss: 0.0358, Entropy Loss: -86.8686
Current Intensity: 0.2171, Max Intensity: 3.8149
---


 10%|▉         | 290/3000 [02:14<27:11,  1.66it/s]

Episode 290, Reward: 20.7020
Policy Loss: -0.0872, Value Loss: 0.0350, Entropy Loss: -87.0744
Current Intensity: 0.2352, Max Intensity: 3.7698
---


 10%|▉         | 295/3000 [02:16<24:56,  1.81it/s]

Episode 295, Reward: 19.5297
Policy Loss: -0.0349, Value Loss: 0.0504, Entropy Loss: -86.9582
Current Intensity: 0.0875, Max Intensity: 3.7111
---


 10%|▉         | 299/3000 [02:18<17:00,  2.65it/s]

Episode 300, Reward: 19.3837
Policy Loss: -0.1622, Value Loss: 0.0297, Entropy Loss: -86.9215
Current Intensity: 0.3629, Max Intensity: 3.8184
---


 10%|█         | 300/3000 [02:20<45:27,  1.01s/it]

Evaluation at episode 300: Average Reward = 74.4973


 10%|█         | 305/3000 [02:23<31:31,  1.42it/s]

Episode 305, Reward: 21.3880
Policy Loss: -0.2119, Value Loss: 0.0548, Entropy Loss: -86.7375
Current Intensity: 0.3388, Max Intensity: 3.6497
---


 10%|█         | 310/3000 [02:25<26:51,  1.67it/s]

Episode 310, Reward: 19.9619
Policy Loss: -0.2755, Value Loss: 0.0336, Entropy Loss: -86.5593
Current Intensity: 0.2538, Max Intensity: 3.2584
---


 10%|█         | 315/3000 [02:27<25:05,  1.78it/s]

Episode 315, Reward: 21.0598
Policy Loss: -0.2131, Value Loss: 0.0378, Entropy Loss: -86.6584
Current Intensity: 0.0476, Max Intensity: 3.7933
---


 11%|█         | 320/3000 [02:29<25:56,  1.72it/s]

Episode 320, Reward: 21.9162
Policy Loss: -0.3172, Value Loss: 0.0641, Entropy Loss: -86.6338
Current Intensity: 0.3980, Max Intensity: 3.7444
---


 11%|█         | 324/3000 [02:31<16:33,  2.69it/s]

Episode 325, Reward: 19.7495
Policy Loss: -0.0904, Value Loss: 0.0470, Entropy Loss: -86.3940
Current Intensity: 0.0803, Max Intensity: 3.9536
---


 11%|█         | 325/3000 [02:33<46:06,  1.03s/it]

Evaluation at episode 325: Average Reward = 79.1265


 11%|█         | 330/3000 [02:36<29:15,  1.52it/s]

Episode 330, Reward: 18.8181
Policy Loss: -0.1927, Value Loss: 0.0387, Entropy Loss: -86.5070
Current Intensity: 0.0467, Max Intensity: 3.6917
---


 11%|█         | 335/3000 [02:38<26:01,  1.71it/s]

Episode 335, Reward: 18.0862
Policy Loss: -0.1089, Value Loss: 0.0362, Entropy Loss: -86.3961
Current Intensity: 0.1004, Max Intensity: 3.7649
---


 11%|█▏        | 340/3000 [02:40<25:36,  1.73it/s]

Episode 340, Reward: 20.6319
Policy Loss: -0.2344, Value Loss: 0.0372, Entropy Loss: -86.3982
Current Intensity: 0.5306, Max Intensity: 3.8241
---


 12%|█▏        | 345/3000 [02:43<26:17,  1.68it/s]

Episode 345, Reward: 20.8071
Policy Loss: -0.2212, Value Loss: 0.0461, Entropy Loss: -86.2877
Current Intensity: 0.0989, Max Intensity: 3.1774
---


 12%|█▏        | 349/3000 [02:44<16:07,  2.74it/s]

Episode 350, Reward: 22.1967
Policy Loss: -0.3625, Value Loss: 0.0406, Entropy Loss: -86.0781
Current Intensity: 0.5220, Max Intensity: 3.7499
---


 12%|█▏        | 350/3000 [02:46<44:14,  1.00s/it]

Evaluation at episode 350: Average Reward = 80.6120


 12%|█▏        | 355/3000 [02:49<28:58,  1.52it/s]

Episode 355, Reward: 17.2840
Policy Loss: -0.1880, Value Loss: 0.0611, Entropy Loss: -86.0112
Current Intensity: 0.0035, Max Intensity: 3.7693
---


 12%|█▏        | 360/3000 [02:51<26:12,  1.68it/s]

Episode 360, Reward: 20.8227
Policy Loss: -0.0060, Value Loss: 0.0365, Entropy Loss: -85.9753
Current Intensity: 0.3759, Max Intensity: 3.6302
---


 12%|█▏        | 365/3000 [02:53<26:09,  1.68it/s]

Episode 365, Reward: 20.5701
Policy Loss: -0.2215, Value Loss: 0.0384, Entropy Loss: -85.9207
Current Intensity: 0.1275, Max Intensity: 3.6435
---


 12%|█▏        | 370/3000 [02:56<25:35,  1.71it/s]

Episode 370, Reward: 24.1263
Policy Loss: -0.1032, Value Loss: 0.0482, Entropy Loss: -85.8566
Current Intensity: 0.1310, Max Intensity: 3.6768
---


 12%|█▏        | 374/3000 [02:57<16:47,  2.61it/s]

Episode 375, Reward: 20.8598
Policy Loss: -0.1369, Value Loss: 0.0415, Entropy Loss: -85.8509
Current Intensity: 0.0924, Max Intensity: 3.7181
---


 12%|█▎        | 375/3000 [02:59<46:10,  1.06s/it]

Evaluation at episode 375: Average Reward = 90.8920


 13%|█▎        | 380/3000 [03:02<29:25,  1.48it/s]

Episode 380, Reward: 19.6923
Policy Loss: -0.1724, Value Loss: 0.0341, Entropy Loss: -85.8655
Current Intensity: 0.9714, Max Intensity: 3.7802
---


 13%|█▎        | 385/3000 [03:04<25:42,  1.70it/s]

Episode 385, Reward: 23.9172
Policy Loss: -0.1605, Value Loss: 0.0398, Entropy Loss: -85.6540
Current Intensity: 0.6541, Max Intensity: 3.8105
---


 13%|█▎        | 390/3000 [03:06<26:28,  1.64it/s]

Episode 390, Reward: 21.0595
Policy Loss: 0.0071, Value Loss: 0.0409, Entropy Loss: -85.5531
Current Intensity: 0.4683, Max Intensity: 2.9647
---


 13%|█▎        | 395/3000 [03:09<25:44,  1.69it/s]

Episode 395, Reward: 19.3407
Policy Loss: -0.2333, Value Loss: 0.0627, Entropy Loss: -85.4388
Current Intensity: 0.0687, Max Intensity: 3.8783
---


 13%|█▎        | 399/3000 [03:10<16:02,  2.70it/s]

Episode 400, Reward: 20.3330
Policy Loss: -0.2376, Value Loss: 0.0517, Entropy Loss: -85.3391
Current Intensity: 0.1947, Max Intensity: 4.1472
---


 13%|█▎        | 400/3000 [03:12<44:07,  1.02s/it]

Evaluation at episode 400: Average Reward = 92.2628


 14%|█▎        | 405/3000 [03:15<28:03,  1.54it/s]

Episode 405, Reward: 25.1417
Policy Loss: -0.2628, Value Loss: 0.0420, Entropy Loss: -85.1760
Current Intensity: 1.0801, Max Intensity: 5.6007
---


 14%|█▎        | 410/3000 [03:17<26:30,  1.63it/s]

Episode 410, Reward: 21.0305
Policy Loss: -0.1501, Value Loss: 0.0348, Entropy Loss: -84.9304
Current Intensity: 0.3918, Max Intensity: 5.5894
---


 14%|█▍        | 415/3000 [03:19<25:15,  1.71it/s]

Episode 415, Reward: 21.2420
Policy Loss: -0.2641, Value Loss: 0.0516, Entropy Loss: -84.8782
Current Intensity: 0.7329, Max Intensity: 5.5023
---


 14%|█▍        | 420/3000 [03:22<25:01,  1.72it/s]

Episode 420, Reward: 23.9181
Policy Loss: -0.1213, Value Loss: 0.0514, Entropy Loss: -85.0025
Current Intensity: 0.1673, Max Intensity: 5.4664
---


 14%|█▍        | 424/3000 [03:23<16:08,  2.66it/s]

Episode 425, Reward: 19.3577
Policy Loss: -0.1326, Value Loss: 0.0400, Entropy Loss: -84.8230
Current Intensity: -0.0001, Max Intensity: 5.5242
---


 14%|█▍        | 425/3000 [03:26<43:40,  1.02s/it]

Evaluation at episode 425: Average Reward = 100.5525


 14%|█▍        | 430/3000 [03:28<28:11,  1.52it/s]

Episode 430, Reward: 23.5291
Policy Loss: -0.1722, Value Loss: 0.0819, Entropy Loss: -84.8313
Current Intensity: 0.7197, Max Intensity: 6.3583
---


 14%|█▍        | 435/3000 [03:30<24:57,  1.71it/s]

Episode 435, Reward: 22.3812
Policy Loss: -0.0926, Value Loss: 0.0519, Entropy Loss: -84.8324
Current Intensity: 0.1537, Max Intensity: 6.3217
---


 15%|█▍        | 440/3000 [03:33<25:42,  1.66it/s]

Episode 440, Reward: 23.7888
Policy Loss: -0.1440, Value Loss: 0.0613, Entropy Loss: -84.9023
Current Intensity: 0.7301, Max Intensity: 6.3996
---


 15%|█▍        | 445/3000 [03:35<25:34,  1.66it/s]

Episode 445, Reward: 22.7106
Policy Loss: -0.2324, Value Loss: 0.0604, Entropy Loss: -84.5541
Current Intensity: 0.3878, Max Intensity: 6.3237
---


 15%|█▍        | 449/3000 [03:36<15:20,  2.77it/s]

Episode 450, Reward: 23.5363
Policy Loss: -0.1032, Value Loss: 0.0608, Entropy Loss: -84.4971
Current Intensity: 0.4088, Max Intensity: 6.3533
---


 15%|█▌        | 450/3000 [03:39<44:04,  1.04s/it]

Evaluation at episode 450: Average Reward = 104.6190


 15%|█▌        | 455/3000 [03:41<26:18,  1.61it/s]

Episode 455, Reward: 25.2317
Policy Loss: -0.1976, Value Loss: 0.0438, Entropy Loss: -84.3162
Current Intensity: 0.5704, Max Intensity: 6.4705
---


 15%|█▌        | 460/3000 [03:43<25:29,  1.66it/s]

Episode 460, Reward: 23.9777
Policy Loss: -0.1623, Value Loss: 0.0508, Entropy Loss: -84.5766
Current Intensity: -0.0397, Max Intensity: 6.3120
---


 16%|█▌        | 465/3000 [03:46<23:49,  1.77it/s]

Episode 465, Reward: 21.9107
Policy Loss: -0.1175, Value Loss: 0.0430, Entropy Loss: -84.4665
Current Intensity: 0.5150, Max Intensity: 6.3336
---


 16%|█▌        | 470/3000 [03:48<26:10,  1.61it/s]

Episode 470, Reward: 22.6160
Policy Loss: -0.2667, Value Loss: 0.0581, Entropy Loss: -84.4788
Current Intensity: 0.7519, Max Intensity: 6.4478
---


 16%|█▌        | 474/3000 [03:49<17:03,  2.47it/s]

Episode 475, Reward: 20.7728
Policy Loss: -0.1053, Value Loss: 0.0468, Entropy Loss: -84.6839
Current Intensity: 0.3066, Max Intensity: 6.4369
---


 16%|█▌        | 475/3000 [03:52<42:04,  1.00it/s]

Evaluation at episode 475: Average Reward = 108.5711


 16%|█▌        | 480/3000 [03:54<29:13,  1.44it/s]

Episode 480, Reward: 19.9387
Policy Loss: -0.1882, Value Loss: 0.0400, Entropy Loss: -84.7554
Current Intensity: 0.4971, Max Intensity: 5.6904
---


 16%|█▌        | 485/3000 [03:56<23:27,  1.79it/s]

Episode 485, Reward: 17.8145
Policy Loss: -0.2241, Value Loss: 0.0569, Entropy Loss: -84.5141
Current Intensity: 0.1385, Max Intensity: 6.4379
---


 16%|█▋        | 490/3000 [03:59<25:38,  1.63it/s]

Episode 490, Reward: 25.5706
Policy Loss: -0.1746, Value Loss: 0.0649, Entropy Loss: -84.5154
Current Intensity: 0.1845, Max Intensity: 6.3864
---


 16%|█▋        | 495/3000 [04:01<23:19,  1.79it/s]

Episode 495, Reward: 23.0405
Policy Loss: -0.2973, Value Loss: 0.0520, Entropy Loss: -84.4312
Current Intensity: 0.7849, Max Intensity: 5.6225
---


 17%|█▋        | 499/3000 [04:02<14:47,  2.82it/s]

Episode 500, Reward: 27.2279
Policy Loss: -0.1984, Value Loss: 0.0610, Entropy Loss: -84.0896
Current Intensity: 0.3959, Max Intensity: 6.3081
---


 17%|█▋        | 500/3000 [04:05<49:53,  1.20s/it]

Evaluation at episode 500: Average Reward = 106.3771


 17%|█▋        | 505/3000 [04:08<27:50,  1.49it/s]

Episode 505, Reward: 21.6376
Policy Loss: -0.1456, Value Loss: 0.0309, Entropy Loss: -83.8479
Current Intensity: 0.1334, Max Intensity: 5.9950
---


 17%|█▋        | 510/3000 [04:10<26:35,  1.56it/s]

Episode 510, Reward: 20.3014
Policy Loss: -0.1414, Value Loss: 0.0440, Entropy Loss: -84.0853
Current Intensity: 0.3495, Max Intensity: 5.7715
---


 17%|█▋        | 515/3000 [04:12<23:15,  1.78it/s]

Episode 515, Reward: 25.1013
Policy Loss: -0.1725, Value Loss: 0.0429, Entropy Loss: -83.7191
Current Intensity: 0.9318, Max Intensity: 6.0542
---


 17%|█▋        | 520/3000 [04:15<26:36,  1.55it/s]

Episode 520, Reward: 23.4293
Policy Loss: 0.0453, Value Loss: 0.0308, Entropy Loss: -83.5945
Current Intensity: 0.6722, Max Intensity: 6.0559
---


 17%|█▋        | 524/3000 [04:16<15:55,  2.59it/s]

Episode 525, Reward: 23.2904
Policy Loss: -0.2748, Value Loss: 0.0677, Entropy Loss: -83.6998
Current Intensity: 0.7569, Max Intensity: 6.1531
---


 18%|█▊        | 525/3000 [04:19<40:48,  1.01it/s]

Evaluation at episode 525: Average Reward = 109.1273


 18%|█▊        | 530/3000 [04:21<27:13,  1.51it/s]

Episode 530, Reward: 22.7825
Policy Loss: -0.0621, Value Loss: 0.0513, Entropy Loss: -83.8074
Current Intensity: 0.0124, Max Intensity: 6.0522
---


 18%|█▊        | 535/3000 [04:23<23:37,  1.74it/s]

Episode 535, Reward: 27.0628
Policy Loss: -0.0905, Value Loss: 0.0531, Entropy Loss: -83.5746
Current Intensity: 0.4110, Max Intensity: 6.0149
---


 18%|█▊        | 540/3000 [04:26<25:28,  1.61it/s]

Episode 540, Reward: 22.9279
Policy Loss: -0.1395, Value Loss: 0.0509, Entropy Loss: -83.9558
Current Intensity: 0.0897, Max Intensity: 6.0612
---


 18%|█▊        | 545/3000 [04:28<23:34,  1.74it/s]

Episode 545, Reward: 26.0517
Policy Loss: -0.3892, Value Loss: 0.0730, Entropy Loss: -83.5432
Current Intensity: 0.5166, Max Intensity: 5.0520
---


 18%|█▊        | 549/3000 [04:30<15:57,  2.56it/s]

Episode 550, Reward: 24.4256
Policy Loss: -0.4555, Value Loss: 0.0888, Entropy Loss: -83.6121
Current Intensity: 1.1820, Max Intensity: 6.0888
---


 18%|█▊        | 550/3000 [04:32<42:44,  1.05s/it]

Evaluation at episode 550: Average Reward = 107.5654


 18%|█▊        | 555/3000 [04:35<29:32,  1.38it/s]

Episode 555, Reward: 28.2633
Policy Loss: -0.2358, Value Loss: 0.0615, Entropy Loss: -83.3637
Current Intensity: 1.5354, Max Intensity: 4.5368
---


 19%|█▊        | 560/3000 [04:37<23:52,  1.70it/s]

Episode 560, Reward: 27.4025
Policy Loss: -0.2120, Value Loss: 0.0804, Entropy Loss: -83.1350
Current Intensity: 0.4710, Max Intensity: 5.9593
---


 19%|█▉        | 565/3000 [04:39<23:50,  1.70it/s]

Episode 565, Reward: 26.3888
Policy Loss: -0.0573, Value Loss: 0.0566, Entropy Loss: -83.0186
Current Intensity: 0.6003, Max Intensity: 6.0584
---


 19%|█▉        | 570/3000 [04:42<23:54,  1.69it/s]

Episode 570, Reward: 27.1083
Policy Loss: -0.1686, Value Loss: 0.0604, Entropy Loss: -83.0423
Current Intensity: 0.5768, Max Intensity: 6.0069
---


 19%|█▉        | 574/3000 [04:43<14:59,  2.70it/s]

Episode 575, Reward: 26.4857
Policy Loss: -0.1973, Value Loss: 0.0629, Entropy Loss: -82.5702
Current Intensity: 0.2485, Max Intensity: 6.0855
---


 19%|█▉        | 575/3000 [04:46<43:11,  1.07s/it]

Evaluation at episode 575: Average Reward = 109.7161


 19%|█▉        | 580/3000 [04:48<26:22,  1.53it/s]

Episode 580, Reward: 26.6722
Policy Loss: -0.1712, Value Loss: 0.0589, Entropy Loss: -82.8106
Current Intensity: 0.1474, Max Intensity: 4.2339
---


 20%|█▉        | 585/3000 [04:50<24:55,  1.61it/s]

Episode 585, Reward: 24.5033
Policy Loss: -0.1367, Value Loss: 0.0751, Entropy Loss: -82.5775
Current Intensity: 0.0753, Max Intensity: 4.0961
---


 20%|█▉        | 590/3000 [04:53<23:42,  1.69it/s]

Episode 590, Reward: 26.4063
Policy Loss: -0.0836, Value Loss: 0.0713, Entropy Loss: -82.4662
Current Intensity: 0.2184, Max Intensity: 5.1594
---


 20%|█▉        | 595/3000 [04:55<24:48,  1.62it/s]

Episode 595, Reward: 23.5709
Policy Loss: -0.1050, Value Loss: 0.0850, Entropy Loss: -82.6755
Current Intensity: 0.0717, Max Intensity: 5.2686
---


 20%|█▉        | 599/3000 [04:57<16:17,  2.46it/s]

Episode 600, Reward: 27.7993
Policy Loss: -0.1777, Value Loss: 0.0554, Entropy Loss: -82.6786
Current Intensity: 0.1052, Max Intensity: 5.1928
---


 20%|██        | 600/3000 [04:59<40:34,  1.01s/it]

Evaluation at episode 600: Average Reward = 101.6685


 20%|██        | 605/3000 [05:02<29:10,  1.37it/s]

Episode 605, Reward: 28.9376
Policy Loss: -0.0257, Value Loss: 0.0770, Entropy Loss: -82.7884
Current Intensity: 0.7914, Max Intensity: 4.0810
---


 20%|██        | 610/3000 [05:04<23:13,  1.71it/s]

Episode 610, Reward: 27.0687
Policy Loss: -0.1860, Value Loss: 0.0814, Entropy Loss: -82.6860
Current Intensity: 1.1734, Max Intensity: 4.0447
---


 20%|██        | 615/3000 [05:07<26:45,  1.49it/s]

Episode 615, Reward: 27.4670
Policy Loss: -0.1744, Value Loss: 0.0553, Entropy Loss: -82.8254
Current Intensity: 0.4095, Max Intensity: 4.1453
---


 21%|██        | 620/3000 [05:09<23:46,  1.67it/s]

Episode 620, Reward: 26.9318
Policy Loss: -0.2249, Value Loss: 0.0543, Entropy Loss: -82.9751
Current Intensity: 0.8357, Max Intensity: 4.0255
---


 21%|██        | 624/3000 [05:10<15:47,  2.51it/s]

Episode 625, Reward: 24.8443
Policy Loss: -0.1719, Value Loss: 0.0705, Entropy Loss: -83.0314
Current Intensity: 0.2373, Max Intensity: 4.0231
---


 21%|██        | 625/3000 [05:13<43:03,  1.09s/it]

Evaluation at episode 625: Average Reward = 91.9705


 21%|██        | 630/3000 [05:16<27:38,  1.43it/s]

Episode 630, Reward: 23.7719
Policy Loss: -0.0752, Value Loss: 0.0746, Entropy Loss: -83.0427
Current Intensity: 0.3133, Max Intensity: 4.4673
---


 21%|██        | 635/3000 [05:18<23:53,  1.65it/s]

Episode 635, Reward: 25.6806
Policy Loss: -0.1697, Value Loss: 0.0593, Entropy Loss: -83.1516
Current Intensity: 1.1099, Max Intensity: 4.5912
---


 21%|██▏       | 640/3000 [05:20<23:23,  1.68it/s]

Episode 640, Reward: 26.6547
Policy Loss: -0.2393, Value Loss: 0.0466, Entropy Loss: -83.3519
Current Intensity: 0.1072, Max Intensity: 4.4860
---


 22%|██▏       | 645/3000 [05:23<23:52,  1.64it/s]

Episode 645, Reward: 25.4353
Policy Loss: -0.2121, Value Loss: 0.0744, Entropy Loss: -83.0545
Current Intensity: 0.9991, Max Intensity: 4.4028
---


 22%|██▏       | 649/3000 [05:24<15:55,  2.46it/s]

Episode 650, Reward: 29.6930
Policy Loss: -0.1497, Value Loss: 0.0575, Entropy Loss: -83.4431
Current Intensity: 0.0720, Max Intensity: 4.3791
---


 22%|██▏       | 650/3000 [05:27<41:49,  1.07s/it]

Evaluation at episode 650: Average Reward = 94.1631


 22%|██▏       | 655/3000 [05:29<27:10,  1.44it/s]

Episode 655, Reward: 28.1003
Policy Loss: -0.2463, Value Loss: 0.0564, Entropy Loss: -83.1468
Current Intensity: 1.0324, Max Intensity: 4.4289
---


 22%|██▏       | 660/3000 [05:32<24:35,  1.59it/s]

Episode 660, Reward: 28.3709
Policy Loss: -0.1339, Value Loss: 0.0563, Entropy Loss: -82.9010
Current Intensity: 1.3187, Max Intensity: 4.4280
---


 22%|██▏       | 665/3000 [05:34<24:30,  1.59it/s]

Episode 665, Reward: 31.4150
Policy Loss: -0.2719, Value Loss: 0.0834, Entropy Loss: -83.0202
Current Intensity: 0.8574, Max Intensity: 4.5260
---


 22%|██▏       | 670/3000 [05:37<23:21,  1.66it/s]

Episode 670, Reward: 30.3567
Policy Loss: -0.1528, Value Loss: 0.0714, Entropy Loss: -82.5046
Current Intensity: 0.1753, Max Intensity: 4.5269
---


 22%|██▏       | 674/3000 [05:38<14:34,  2.66it/s]

Episode 675, Reward: 23.1070
Policy Loss: -0.0841, Value Loss: 0.0688, Entropy Loss: -82.8938
Current Intensity: 1.4236, Max Intensity: 4.4476
---


 22%|██▎       | 675/3000 [05:40<40:58,  1.06s/it]

Evaluation at episode 675: Average Reward = 101.5735


 23%|██▎       | 680/3000 [05:43<24:47,  1.56it/s]

Episode 680, Reward: 24.7083
Policy Loss: -0.0610, Value Loss: 0.0984, Entropy Loss: -82.2444
Current Intensity: 0.1282, Max Intensity: 4.5019
---


 23%|██▎       | 685/3000 [05:45<23:12,  1.66it/s]

Episode 685, Reward: 28.7677
Policy Loss: -0.0554, Value Loss: 0.1015, Entropy Loss: -82.6129
Current Intensity: 0.1481, Max Intensity: 4.4517
---


 23%|██▎       | 690/3000 [05:48<22:21,  1.72it/s]

Episode 690, Reward: 29.9135
Policy Loss: -0.0421, Value Loss: 0.0517, Entropy Loss: -81.9883
Current Intensity: 1.0409, Max Intensity: 3.7607
---


 23%|██▎       | 695/3000 [05:50<22:39,  1.70it/s]

Episode 695, Reward: 31.6954
Policy Loss: -0.1877, Value Loss: 0.0932, Entropy Loss: -81.9857
Current Intensity: 0.3109, Max Intensity: 4.4034
---


 23%|██▎       | 699/3000 [05:51<14:57,  2.56it/s]

Episode 700, Reward: 30.8672
Policy Loss: -0.2275, Value Loss: 0.0685, Entropy Loss: -82.2051
Current Intensity: 0.3203, Max Intensity: 4.4455
---


 23%|██▎       | 700/3000 [05:54<40:18,  1.05s/it]

Evaluation at episode 700: Average Reward = 102.2747


 24%|██▎       | 705/3000 [05:56<26:47,  1.43it/s]

Episode 705, Reward: 28.1732
Policy Loss: -0.1991, Value Loss: 0.0754, Entropy Loss: -82.6865
Current Intensity: 0.5264, Max Intensity: 4.9896
---


 24%|██▎       | 710/3000 [05:59<22:19,  1.71it/s]

Episode 710, Reward: 29.5380
Policy Loss: -0.4135, Value Loss: 0.0610, Entropy Loss: -82.7297
Current Intensity: 0.5820, Max Intensity: 4.9578
---


 24%|██▍       | 715/3000 [06:01<24:41,  1.54it/s]

Episode 715, Reward: 27.0966
Policy Loss: -0.1091, Value Loss: 0.0642, Entropy Loss: -82.3899
Current Intensity: 0.2098, Max Intensity: 4.9136
---


 24%|██▍       | 720/3000 [06:04<21:09,  1.80it/s]

Episode 720, Reward: 32.9870
Policy Loss: -0.1383, Value Loss: 0.0705, Entropy Loss: -82.2798
Current Intensity: 0.6789, Max Intensity: 4.9068
---


 24%|██▍       | 724/3000 [06:05<14:54,  2.54it/s]

Episode 725, Reward: 31.4278
Policy Loss: -0.0375, Value Loss: 0.0732, Entropy Loss: -82.3215
Current Intensity: 0.2390, Max Intensity: 4.9367
---


 24%|██▍       | 725/3000 [06:08<39:34,  1.04s/it]

Evaluation at episode 725: Average Reward = 107.2001


 24%|██▍       | 730/3000 [06:10<26:09,  1.45it/s]

Episode 730, Reward: 28.3697
Policy Loss: -0.2262, Value Loss: 0.0842, Entropy Loss: -82.4728
Current Intensity: 0.8143, Max Intensity: 4.9879
---


 24%|██▍       | 735/3000 [06:12<23:37,  1.60it/s]

Episode 735, Reward: 28.8160
Policy Loss: -0.1581, Value Loss: 0.0659, Entropy Loss: -82.6084
Current Intensity: 0.2727, Max Intensity: 4.8711
---


 25%|██▍       | 740/3000 [06:15<23:08,  1.63it/s]

Episode 740, Reward: 30.4749
Policy Loss: -0.1146, Value Loss: 0.0827, Entropy Loss: -82.1140
Current Intensity: 0.6190, Max Intensity: 4.8977
---


 25%|██▍       | 745/3000 [06:17<22:50,  1.65it/s]

Episode 745, Reward: 30.0236
Policy Loss: -0.2189, Value Loss: 0.1536, Entropy Loss: -82.1817
Current Intensity: 0.5091, Max Intensity: 4.9178
---


 25%|██▍       | 749/3000 [06:19<13:17,  2.82it/s]

Episode 750, Reward: 29.2909
Policy Loss: -0.0625, Value Loss: 0.0741, Entropy Loss: -82.0219
Current Intensity: 0.4939, Max Intensity: 4.8300
---


 25%|██▌       | 750/3000 [06:21<39:03,  1.04s/it]

Evaluation at episode 750: Average Reward = 110.4890


 25%|██▌       | 755/3000 [06:24<24:35,  1.52it/s]

Episode 755, Reward: 34.2047
Policy Loss: -0.2224, Value Loss: 0.1157, Entropy Loss: -81.7692
Current Intensity: 0.5865, Max Intensity: 5.3010
---


 25%|██▌       | 760/3000 [06:26<23:49,  1.57it/s]

Episode 760, Reward: 32.9690
Policy Loss: -0.1049, Value Loss: 0.1242, Entropy Loss: -81.8499
Current Intensity: 0.0418, Max Intensity: 5.2557
---


 26%|██▌       | 765/3000 [06:28<21:22,  1.74it/s]

Episode 765, Reward: 31.2824
Policy Loss: -0.2557, Value Loss: 0.0834, Entropy Loss: -82.1318
Current Intensity: 0.4199, Max Intensity: 5.1609
---


 26%|██▌       | 770/3000 [06:31<23:07,  1.61it/s]

Episode 770, Reward: 31.1278
Policy Loss: -0.1606, Value Loss: 0.0674, Entropy Loss: -82.2035
Current Intensity: 1.1192, Max Intensity: 5.1055
---


 26%|██▌       | 774/3000 [06:32<14:44,  2.52it/s]

Episode 775, Reward: 33.0284
Policy Loss: -0.1302, Value Loss: 0.0920, Entropy Loss: -81.9086
Current Intensity: 0.2518, Max Intensity: 5.1854
---


 26%|██▌       | 775/3000 [06:35<38:01,  1.03s/it]

Evaluation at episode 775: Average Reward = 112.0008


 26%|██▌       | 780/3000 [06:37<27:31,  1.34it/s]

Episode 780, Reward: 34.5596
Policy Loss: -0.2896, Value Loss: 0.1198, Entropy Loss: -82.0534
Current Intensity: 0.2665, Max Intensity: 4.2803
---


 26%|██▌       | 785/3000 [06:40<20:46,  1.78it/s]

Episode 785, Reward: 29.3211
Policy Loss: -0.1098, Value Loss: 0.0657, Entropy Loss: -82.1262
Current Intensity: 0.2047, Max Intensity: 5.2819
---


 26%|██▋       | 790/3000 [06:42<24:22,  1.51it/s]

Episode 790, Reward: 34.4611
Policy Loss: -0.2016, Value Loss: 0.0857, Entropy Loss: -81.8572
Current Intensity: 1.1360, Max Intensity: 5.1019
---


 26%|██▋       | 795/3000 [06:44<20:47,  1.77it/s]

Episode 795, Reward: 34.8743
Policy Loss: -0.1871, Value Loss: 0.0910, Entropy Loss: -80.9843
Current Intensity: 1.8495, Max Intensity: 5.1704
---


 27%|██▋       | 799/3000 [06:46<14:23,  2.55it/s]

Episode 800, Reward: 29.4582
Policy Loss: -0.1995, Value Loss: 0.0821, Entropy Loss: -81.4334
Current Intensity: 0.0749, Max Intensity: 5.1539
---


 27%|██▋       | 800/3000 [06:49<40:42,  1.11s/it]

Evaluation at episode 800: Average Reward = 111.1260


 27%|██▋       | 805/3000 [06:51<24:57,  1.47it/s]

Episode 805, Reward: 31.8029
Policy Loss: -0.0875, Value Loss: 0.0972, Entropy Loss: -81.3171
Current Intensity: 0.0462, Max Intensity: 5.4078
---


 27%|██▋       | 810/3000 [06:54<23:16,  1.57it/s]

Episode 810, Reward: 30.2172
Policy Loss: -0.1514, Value Loss: 0.0952, Entropy Loss: -81.1430
Current Intensity: 0.3560, Max Intensity: 5.4392
---


 27%|██▋       | 815/3000 [06:56<22:00,  1.65it/s]

Episode 815, Reward: 31.7983
Policy Loss: -0.0380, Value Loss: 0.0756, Entropy Loss: -80.7769
Current Intensity: 0.8697, Max Intensity: 5.5228
---


 27%|██▋       | 820/3000 [06:58<22:37,  1.61it/s]

Episode 820, Reward: 32.6874
Policy Loss: -0.1859, Value Loss: 0.1153, Entropy Loss: -80.9955
Current Intensity: 0.5626, Max Intensity: 5.4440
---


 27%|██▋       | 824/3000 [07:00<13:54,  2.61it/s]

Episode 825, Reward: 29.9324
Policy Loss: -0.1982, Value Loss: 0.0750, Entropy Loss: -81.1333
Current Intensity: 0.0960, Max Intensity: 5.4854
---


 28%|██▊       | 825/3000 [07:02<39:56,  1.10s/it]

Evaluation at episode 825: Average Reward = 112.4737


 28%|██▊       | 830/3000 [07:05<23:33,  1.53it/s]

Episode 830, Reward: 33.1840
Policy Loss: -0.2215, Value Loss: 0.1114, Entropy Loss: -81.0745
Current Intensity: 0.3508, Max Intensity: 6.0048
---


 28%|██▊       | 835/3000 [07:07<22:53,  1.58it/s]

Episode 835, Reward: 37.8327
Policy Loss: -0.0719, Value Loss: 0.1013, Entropy Loss: -81.0000
Current Intensity: 0.2115, Max Intensity: 6.1277
---


 28%|██▊       | 840/3000 [07:10<20:43,  1.74it/s]

Episode 840, Reward: 32.6884
Policy Loss: -0.1256, Value Loss: 0.0947, Entropy Loss: -81.0977
Current Intensity: 0.1210, Max Intensity: 6.1093
---


 28%|██▊       | 845/3000 [07:12<22:21,  1.61it/s]

Episode 845, Reward: 34.7071
Policy Loss: -0.2629, Value Loss: 0.1058, Entropy Loss: -80.6872
Current Intensity: 0.9123, Max Intensity: 6.1121
---


 28%|██▊       | 849/3000 [07:14<16:08,  2.22it/s]

Episode 850, Reward: 37.9292
Policy Loss: -0.1781, Value Loss: 0.1012, Entropy Loss: -80.2555
Current Intensity: 0.3877, Max Intensity: 6.1508
---


 28%|██▊       | 850/3000 [07:16<39:03,  1.09s/it]

Evaluation at episode 850: Average Reward = 113.9628


 28%|██▊       | 855/3000 [07:19<25:54,  1.38it/s]

Episode 855, Reward: 34.1715
Policy Loss: -0.1795, Value Loss: 0.0985, Entropy Loss: -80.4274
Current Intensity: 0.1750, Max Intensity: 6.3414
---


 29%|██▊       | 860/3000 [07:21<21:25,  1.66it/s]

Episode 860, Reward: 35.7558
Policy Loss: -0.1969, Value Loss: 0.1149, Entropy Loss: -79.8966
Current Intensity: 0.4973, Max Intensity: 6.2806
---


 29%|██▉       | 865/3000 [07:24<22:44,  1.56it/s]

Episode 865, Reward: 38.9552
Policy Loss: -0.1572, Value Loss: 0.1121, Entropy Loss: -79.9301
Current Intensity: 0.3284, Max Intensity: 5.1938
---


 29%|██▉       | 870/3000 [07:26<21:06,  1.68it/s]

Episode 870, Reward: 35.7759
Policy Loss: -0.0720, Value Loss: 0.0840, Entropy Loss: -79.8998
Current Intensity: 0.3006, Max Intensity: 6.2145
---


 29%|██▉       | 874/3000 [07:28<14:33,  2.43it/s]

Episode 875, Reward: 37.3644
Policy Loss: -0.2660, Value Loss: 0.0758, Entropy Loss: -79.4578
Current Intensity: 0.0203, Max Intensity: 6.2881
---


 29%|██▉       | 875/3000 [07:30<39:11,  1.11s/it]

Evaluation at episode 875: Average Reward = 106.3167


 29%|██▉       | 880/3000 [07:33<24:47,  1.43it/s]

Episode 880, Reward: 37.4508
Policy Loss: -0.1405, Value Loss: 0.0654, Entropy Loss: -79.8594
Current Intensity: 0.7722, Max Intensity: 6.6100
---


 30%|██▉       | 885/3000 [07:35<23:09,  1.52it/s]

Episode 885, Reward: 36.0077
Policy Loss: -0.1445, Value Loss: 0.0963, Entropy Loss: -79.4247
Current Intensity: 0.7499, Max Intensity: 6.5926
---


 30%|██▉       | 890/3000 [07:38<21:46,  1.61it/s]

Episode 890, Reward: 37.9995
Policy Loss: -0.1380, Value Loss: 0.1008, Entropy Loss: -79.7498
Current Intensity: 0.4464, Max Intensity: 6.5756
---


 30%|██▉       | 895/3000 [07:40<21:32,  1.63it/s]

Episode 895, Reward: 37.6610
Policy Loss: -0.1915, Value Loss: 0.1092, Entropy Loss: -79.8980
Current Intensity: 0.3940, Max Intensity: 6.6459
---


 30%|██▉       | 899/3000 [07:42<14:18,  2.45it/s]

Episode 900, Reward: 40.4592
Policy Loss: -0.1398, Value Loss: 0.1207, Entropy Loss: -79.7069
Current Intensity: 0.9274, Max Intensity: 6.6084
---


 30%|███       | 900/3000 [07:44<38:17,  1.09s/it]

Evaluation at episode 900: Average Reward = 109.3538


 30%|███       | 905/3000 [07:47<23:51,  1.46it/s]

Episode 905, Reward: 35.6723
Policy Loss: -0.1293, Value Loss: 0.0748, Entropy Loss: -79.9084
Current Intensity: 1.2965, Max Intensity: 6.6597
---


 30%|███       | 910/3000 [07:49<22:54,  1.52it/s]

Episode 910, Reward: 37.3921
Policy Loss: -0.1487, Value Loss: 0.1140, Entropy Loss: -79.8850
Current Intensity: 0.6215, Max Intensity: 6.4941
---


 30%|███       | 915/3000 [07:52<21:36,  1.61it/s]

Episode 915, Reward: 35.8637
Policy Loss: -0.2510, Value Loss: 0.1222, Entropy Loss: -79.8002
Current Intensity: 0.5920, Max Intensity: 6.6000
---


 31%|███       | 920/3000 [07:54<22:02,  1.57it/s]

Episode 920, Reward: 33.6502
Policy Loss: -0.1555, Value Loss: 0.0956, Entropy Loss: -79.6430
Current Intensity: 0.0879, Max Intensity: 6.5797
---


 31%|███       | 924/3000 [07:56<14:22,  2.41it/s]

Episode 925, Reward: 45.2284
Policy Loss: -0.0058, Value Loss: 0.1451, Entropy Loss: -79.7339
Current Intensity: 1.4312, Max Intensity: 6.5206
---


 31%|███       | 925/3000 [07:58<39:15,  1.14s/it]

Evaluation at episode 925: Average Reward = 115.8117


 31%|███       | 930/3000 [08:01<24:01,  1.44it/s]

Episode 930, Reward: 32.5993
Policy Loss: -0.0593, Value Loss: 0.1094, Entropy Loss: -80.1268
Current Intensity: 1.5088, Max Intensity: 5.7571
---


 31%|███       | 935/3000 [08:03<21:56,  1.57it/s]

Episode 935, Reward: 42.0799
Policy Loss: -0.2563, Value Loss: 0.0857, Entropy Loss: -79.9657
Current Intensity: 0.6705, Max Intensity: 5.8929
---


 31%|███▏      | 940/3000 [08:06<20:14,  1.70it/s]

Episode 940, Reward: 38.3903
Policy Loss: -0.0341, Value Loss: 0.1255, Entropy Loss: -79.7840
Current Intensity: 1.1573, Max Intensity: 5.7864
---


 32%|███▏      | 945/3000 [08:08<21:04,  1.62it/s]

Episode 945, Reward: 35.9540
Policy Loss: -0.1390, Value Loss: 0.1670, Entropy Loss: -79.6650
Current Intensity: 0.8880, Max Intensity: 5.7674
---


 32%|███▏      | 949/3000 [08:09<13:08,  2.60it/s]

Episode 950, Reward: 42.8703
Policy Loss: -0.1637, Value Loss: 0.0990, Entropy Loss: -79.3899
Current Intensity: 0.3527, Max Intensity: 4.5021
---


 32%|███▏      | 950/3000 [08:12<36:40,  1.07s/it]

Evaluation at episode 950: Average Reward = 118.5435


 32%|███▏      | 955/3000 [08:15<23:34,  1.45it/s]

Episode 955, Reward: 38.2930
Policy Loss: -0.2458, Value Loss: 0.1462, Entropy Loss: -79.4332
Current Intensity: 1.2056, Max Intensity: 7.5915
---


 32%|███▏      | 960/3000 [08:17<21:47,  1.56it/s]

Episode 960, Reward: 37.2513
Policy Loss: -0.1891, Value Loss: 0.1516, Entropy Loss: -80.1563
Current Intensity: 0.6480, Max Intensity: 7.5563
---


 32%|███▏      | 965/3000 [08:19<20:37,  1.64it/s]

Episode 965, Reward: 40.5793
Policy Loss: -0.0977, Value Loss: 0.1180, Entropy Loss: -79.6432
Current Intensity: 0.7714, Max Intensity: 7.4775
---


 32%|███▏      | 970/3000 [08:22<21:21,  1.58it/s]

Episode 970, Reward: 39.2045
Policy Loss: -0.2281, Value Loss: 0.0941, Entropy Loss: -79.4053
Current Intensity: 0.7607, Max Intensity: 6.6371
---


 32%|███▏      | 974/3000 [08:23<13:57,  2.42it/s]

Episode 975, Reward: 40.2507
Policy Loss: -0.1097, Value Loss: 0.1353, Entropy Loss: -79.1698
Current Intensity: 1.3482, Max Intensity: 7.5251
---


 32%|███▎      | 975/3000 [08:26<35:43,  1.06s/it]

Evaluation at episode 975: Average Reward = 121.2233


 33%|███▎      | 980/3000 [08:29<23:44,  1.42it/s]

Episode 980, Reward: 41.2698
Policy Loss: -0.1968, Value Loss: 0.1037, Entropy Loss: -78.8199
Current Intensity: 0.3561, Max Intensity: 8.3240
---


 33%|███▎      | 985/3000 [08:31<19:38,  1.71it/s]

Episode 985, Reward: 36.3617
Policy Loss: -0.1762, Value Loss: 0.1353, Entropy Loss: -79.0458
Current Intensity: 1.1897, Max Intensity: 6.7334
---


 33%|███▎      | 990/3000 [08:33<22:19,  1.50it/s]

Episode 990, Reward: 40.3415
Policy Loss: 0.0357, Value Loss: 0.0859, Entropy Loss: -78.8171
Current Intensity: 0.5102, Max Intensity: 8.3437
---


 33%|███▎      | 995/3000 [08:36<21:05,  1.58it/s]

Episode 995, Reward: 51.8367
Policy Loss: -0.1886, Value Loss: 0.1602, Entropy Loss: -78.7030
Current Intensity: 0.6600, Max Intensity: 8.2154
---


 33%|███▎      | 999/3000 [08:38<14:45,  2.26it/s]

Episode 1000, Reward: 41.8166
Policy Loss: -0.0364, Value Loss: 0.1317, Entropy Loss: -78.3994
Current Intensity: 0.1886, Max Intensity: 8.2632
---


 33%|███▎      | 1000/3000 [08:40<37:51,  1.14s/it]

Evaluation at episode 1000: Average Reward = 122.7507


 34%|███▎      | 1005/3000 [08:43<24:02,  1.38it/s]

Episode 1005, Reward: 42.0043
Policy Loss: 0.0052, Value Loss: 0.1603, Entropy Loss: -78.6466
Current Intensity: 0.4953, Max Intensity: 7.1980
---


 34%|███▎      | 1010/3000 [08:45<20:39,  1.61it/s]

Episode 1010, Reward: 40.0382
Policy Loss: -0.2059, Value Loss: 0.0995, Entropy Loss: -78.4245
Current Intensity: 0.1844, Max Intensity: 7.4613
---


 34%|███▍      | 1015/3000 [08:48<20:17,  1.63it/s]

Episode 1015, Reward: 45.4446
Policy Loss: -0.2250, Value Loss: 0.1940, Entropy Loss: -78.4212
Current Intensity: 0.7463, Max Intensity: 8.2222
---


 34%|███▍      | 1020/3000 [08:50<20:06,  1.64it/s]

Episode 1020, Reward: 44.6729
Policy Loss: -0.1553, Value Loss: 0.1569, Entropy Loss: -78.2191
Current Intensity: 0.4784, Max Intensity: 8.1900
---


 34%|███▍      | 1024/3000 [08:51<12:24,  2.66it/s]

Episode 1025, Reward: 45.0350
Policy Loss: -0.1946, Value Loss: 0.1402, Entropy Loss: -78.3381
Current Intensity: 0.4410, Max Intensity: 8.2184
---


 34%|███▍      | 1025/3000 [08:54<35:56,  1.09s/it]

Evaluation at episode 1025: Average Reward = 121.5939


 34%|███▍      | 1030/3000 [08:57<21:38,  1.52it/s]

Episode 1030, Reward: 44.2106
Policy Loss: -0.3024, Value Loss: 0.1269, Entropy Loss: -78.4858
Current Intensity: 1.5508, Max Intensity: 6.5472
---


 34%|███▍      | 1035/3000 [08:59<21:49,  1.50it/s]

Episode 1035, Reward: 43.9950
Policy Loss: -0.1010, Value Loss: 0.1352, Entropy Loss: -78.3798
Current Intensity: 0.1730, Max Intensity: 6.5301
---


 35%|███▍      | 1040/3000 [09:01<18:53,  1.73it/s]

Episode 1040, Reward: 45.0864
Policy Loss: -0.0483, Value Loss: 0.1108, Entropy Loss: -77.8844
Current Intensity: 1.6728, Max Intensity: 6.4586
---


 35%|███▍      | 1045/3000 [09:04<21:23,  1.52it/s]

Episode 1045, Reward: 49.2213
Policy Loss: -0.3171, Value Loss: 0.1514, Entropy Loss: -77.7564
Current Intensity: 0.7404, Max Intensity: 6.5157
---


 35%|███▍      | 1049/3000 [09:05<12:43,  2.56it/s]

Episode 1050, Reward: 45.9949
Policy Loss: -0.1830, Value Loss: 0.1037, Entropy Loss: -77.8078
Current Intensity: 0.7540, Max Intensity: 6.6137
---


 35%|███▌      | 1050/3000 [09:08<31:52,  1.02it/s]

Evaluation at episode 1050: Average Reward = 121.9635


 35%|███▌      | 1055/3000 [09:10<22:47,  1.42it/s]

Episode 1055, Reward: 45.0771
Policy Loss: -0.1672, Value Loss: 0.1147, Entropy Loss: -77.8624
Current Intensity: 0.1562, Max Intensity: 6.4707
---


 35%|███▌      | 1060/3000 [09:12<18:35,  1.74it/s]

Episode 1060, Reward: 44.1850
Policy Loss: -0.1737, Value Loss: 0.1403, Entropy Loss: -77.3985
Current Intensity: 0.2631, Max Intensity: 6.6974
---


 36%|███▌      | 1065/3000 [09:15<21:07,  1.53it/s]

Episode 1065, Reward: 43.7675
Policy Loss: -0.1821, Value Loss: 0.1241, Entropy Loss: -76.8965
Current Intensity: 1.4688, Max Intensity: 6.5413
---


 36%|███▌      | 1070/3000 [09:17<18:43,  1.72it/s]

Episode 1070, Reward: 45.6206
Policy Loss: -0.1984, Value Loss: 0.1540, Entropy Loss: -76.9374
Current Intensity: 0.8666, Max Intensity: 5.8954
---


 36%|███▌      | 1074/3000 [09:18<11:36,  2.76it/s]

Episode 1075, Reward: 43.7232
Policy Loss: -0.2431, Value Loss: 0.1669, Entropy Loss: -76.8589
Current Intensity: 0.0355, Max Intensity: 6.6574
---


 36%|███▌      | 1075/3000 [09:21<35:17,  1.10s/it]

Evaluation at episode 1075: Average Reward = 124.9165


 36%|███▌      | 1080/3000 [09:23<21:00,  1.52it/s]

Episode 1080, Reward: 49.3675
Policy Loss: -0.1747, Value Loss: 0.1054, Entropy Loss: -75.6276
Current Intensity: 1.0528, Max Intensity: 6.5313
---


 36%|███▌      | 1085/3000 [09:26<20:17,  1.57it/s]

Episode 1085, Reward: 49.2022
Policy Loss: 0.0287, Value Loss: 0.1219, Entropy Loss: -75.9550
Current Intensity: 0.2096, Max Intensity: 6.5335
---


 36%|███▋      | 1090/3000 [09:28<18:26,  1.73it/s]

Episode 1090, Reward: 57.0715
Policy Loss: -0.0866, Value Loss: 0.2246, Entropy Loss: -75.8620
Current Intensity: 0.5776, Max Intensity: 6.4758
---


 36%|███▋      | 1095/3000 [09:31<20:48,  1.53it/s]

Episode 1095, Reward: 47.9628
Policy Loss: -0.1949, Value Loss: 0.1315, Entropy Loss: -75.2675
Current Intensity: 1.2130, Max Intensity: 6.6731
---


 37%|███▋      | 1099/3000 [09:32<11:58,  2.65it/s]

Episode 1100, Reward: 42.8121
Policy Loss: -0.2090, Value Loss: 0.1601, Entropy Loss: -75.2742
Current Intensity: 0.6718, Max Intensity: 6.6587
---


 37%|███▋      | 1100/3000 [09:35<33:02,  1.04s/it]

Evaluation at episode 1100: Average Reward = 122.8250


 37%|███▋      | 1105/3000 [09:37<21:22,  1.48it/s]

Episode 1105, Reward: 51.3434
Policy Loss: -0.0107, Value Loss: 0.1408, Entropy Loss: -75.1010
Current Intensity: 0.4053, Max Intensity: 5.1607
---


 37%|███▋      | 1110/3000 [09:40<19:43,  1.60it/s]

Episode 1110, Reward: 52.3052
Policy Loss: -0.1801, Value Loss: 0.1572, Entropy Loss: -75.1992
Current Intensity: 1.8205, Max Intensity: 6.0054
---


 37%|███▋      | 1115/3000 [09:42<18:10,  1.73it/s]

Episode 1115, Reward: 53.5818
Policy Loss: -0.2128, Value Loss: 0.1564, Entropy Loss: -74.6233
Current Intensity: 0.4857, Max Intensity: 6.0178
---


 37%|███▋      | 1120/3000 [09:44<17:34,  1.78it/s]

Episode 1120, Reward: 56.5609
Policy Loss: -0.1423, Value Loss: 0.1456, Entropy Loss: -74.8280
Current Intensity: 1.3085, Max Intensity: 5.2462
---


 37%|███▋      | 1124/3000 [09:46<12:28,  2.51it/s]

Episode 1125, Reward: 55.8932
Policy Loss: -0.1916, Value Loss: 0.1567, Entropy Loss: -73.9777
Current Intensity: 0.8991, Max Intensity: 5.9783
---


 38%|███▊      | 1125/3000 [09:48<32:45,  1.05s/it]

Evaluation at episode 1125: Average Reward = 123.9628


 38%|███▊      | 1130/3000 [09:51<21:17,  1.46it/s]

Episode 1130, Reward: 54.1997
Policy Loss: -0.1732, Value Loss: 0.0998, Entropy Loss: -74.3035
Current Intensity: 1.3169, Max Intensity: 5.2642
---


 38%|███▊      | 1135/3000 [09:53<17:48,  1.75it/s]

Episode 1135, Reward: 52.8897
Policy Loss: -0.3169, Value Loss: 0.1375, Entropy Loss: -74.0793
Current Intensity: 0.7316, Max Intensity: 5.2063
---


 38%|███▊      | 1140/3000 [09:55<19:01,  1.63it/s]

Episode 1140, Reward: 52.0241
Policy Loss: -0.0373, Value Loss: 0.1237, Entropy Loss: -73.9482
Current Intensity: 0.6857, Max Intensity: 5.9274
---


 38%|███▊      | 1145/3000 [09:58<18:12,  1.70it/s]

Episode 1145, Reward: 53.9643
Policy Loss: -0.2074, Value Loss: 0.1191, Entropy Loss: -73.4794
Current Intensity: 1.2749, Max Intensity: 5.9365
---


 38%|███▊      | 1149/3000 [09:59<11:31,  2.68it/s]

Episode 1150, Reward: 55.5248
Policy Loss: -0.1553, Value Loss: 0.1485, Entropy Loss: -73.5850
Current Intensity: 1.5158, Max Intensity: 5.8989
---


 38%|███▊      | 1150/3000 [10:02<33:27,  1.09s/it]

Evaluation at episode 1150: Average Reward = 124.2425


 38%|███▊      | 1155/3000 [10:04<21:17,  1.44it/s]

Episode 1155, Reward: 56.1607
Policy Loss: -0.2959, Value Loss: 0.1687, Entropy Loss: -72.8928
Current Intensity: 1.3780, Max Intensity: 7.0842
---


 39%|███▊      | 1160/3000 [10:06<19:24,  1.58it/s]

Episode 1160, Reward: 53.4017
Policy Loss: -0.1428, Value Loss: 0.2165, Entropy Loss: -73.6566
Current Intensity: 1.0590, Max Intensity: 6.0433
---


 39%|███▉      | 1165/3000 [10:09<17:23,  1.76it/s]

Episode 1165, Reward: 54.9295
Policy Loss: -0.0675, Value Loss: 0.1618, Entropy Loss: -73.5648
Current Intensity: 0.3987, Max Intensity: 7.0183
---


 39%|███▉      | 1170/3000 [10:11<18:09,  1.68it/s]

Episode 1170, Reward: 60.1414
Policy Loss: -0.0924, Value Loss: 0.1787, Entropy Loss: -73.1591
Current Intensity: 1.0245, Max Intensity: 6.3877
---


 39%|███▉      | 1174/3000 [10:12<11:55,  2.55it/s]

Episode 1175, Reward: 56.2609
Policy Loss: -0.2131, Value Loss: 0.1123, Entropy Loss: -73.3598
Current Intensity: 1.5547, Max Intensity: 5.4520
---


 39%|███▉      | 1175/3000 [10:15<32:07,  1.06s/it]

Evaluation at episode 1175: Average Reward = 125.1452


 39%|███▉      | 1180/3000 [10:17<20:24,  1.49it/s]

Episode 1180, Reward: 50.8976
Policy Loss: -0.1853, Value Loss: 0.1195, Entropy Loss: -73.5591
Current Intensity: 0.6308, Max Intensity: 8.0580
---


 40%|███▉      | 1185/3000 [10:20<18:03,  1.68it/s]

Episode 1185, Reward: 54.5561
Policy Loss: -0.2396, Value Loss: 0.2367, Entropy Loss: -73.5294
Current Intensity: 0.5148, Max Intensity: 8.0894
---


 40%|███▉      | 1190/3000 [10:22<18:10,  1.66it/s]

Episode 1190, Reward: 59.7369
Policy Loss: 0.0429, Value Loss: 0.2022, Entropy Loss: -73.0666
Current Intensity: 1.4481, Max Intensity: 8.1186
---


 40%|███▉      | 1195/3000 [10:25<18:10,  1.65it/s]

Episode 1195, Reward: 58.9083
Policy Loss: -0.1653, Value Loss: 0.1674, Entropy Loss: -72.7372
Current Intensity: 0.6766, Max Intensity: 8.1564
---


 40%|███▉      | 1199/3000 [10:26<12:13,  2.45it/s]

Episode 1200, Reward: 58.7919
Policy Loss: -0.1249, Value Loss: 0.1699, Entropy Loss: -73.4956
Current Intensity: 1.1450, Max Intensity: 8.1543
---


 40%|████      | 1200/3000 [10:29<32:00,  1.07s/it]

Evaluation at episode 1200: Average Reward = 124.6619


 40%|████      | 1205/3000 [10:31<20:46,  1.44it/s]

Episode 1205, Reward: 62.1367
Policy Loss: -0.1045, Value Loss: 0.2104, Entropy Loss: -73.0804
Current Intensity: 1.1517, Max Intensity: 8.0864
---


 40%|████      | 1210/3000 [10:34<19:44,  1.51it/s]

Episode 1210, Reward: 55.3508
Policy Loss: -0.2157, Value Loss: 0.1980, Entropy Loss: -73.8793
Current Intensity: 1.1465, Max Intensity: 8.1140
---


 40%|████      | 1215/3000 [10:36<18:53,  1.58it/s]

Episode 1215, Reward: 55.8388
Policy Loss: -0.1267, Value Loss: 0.1919, Entropy Loss: -73.6632
Current Intensity: 1.0967, Max Intensity: 6.6734
---


 41%|████      | 1220/3000 [10:38<18:33,  1.60it/s]

Episode 1220, Reward: 52.4112
Policy Loss: -0.2134, Value Loss: 0.1823, Entropy Loss: -73.7886
Current Intensity: 0.9000, Max Intensity: 8.0895
---


 41%|████      | 1224/3000 [10:40<11:20,  2.61it/s]

Episode 1225, Reward: 63.1717
Policy Loss: 0.0278, Value Loss: 0.1798, Entropy Loss: -74.3946
Current Intensity: 0.5989, Max Intensity: 8.1209
---


 41%|████      | 1225/3000 [10:42<31:08,  1.05s/it]

Evaluation at episode 1225: Average Reward = 125.0251


 41%|████      | 1230/3000 [10:45<19:48,  1.49it/s]

Episode 1230, Reward: 57.9717
Policy Loss: -0.1053, Value Loss: 0.1842, Entropy Loss: -73.7291
Current Intensity: 0.9250, Max Intensity: 8.7644
---


 41%|████      | 1235/3000 [10:47<17:52,  1.65it/s]

Episode 1235, Reward: 52.1405
Policy Loss: -0.1980, Value Loss: 0.1918, Entropy Loss: -74.1706
Current Intensity: 1.6639, Max Intensity: 8.7207
---


 41%|████▏     | 1240/3000 [10:49<17:38,  1.66it/s]

Episode 1240, Reward: 56.7462
Policy Loss: -0.3040, Value Loss: 0.1751, Entropy Loss: -73.8313
Current Intensity: 1.4939, Max Intensity: 8.7655
---


 42%|████▏     | 1245/3000 [10:52<17:26,  1.68it/s]

Episode 1245, Reward: 57.6085
Policy Loss: -0.1598, Value Loss: 0.1518, Entropy Loss: -73.5340
Current Intensity: 0.8090, Max Intensity: 8.6466
---


 42%|████▏     | 1249/3000 [10:53<10:07,  2.88it/s]

Episode 1250, Reward: 59.8863
Policy Loss: -0.1329, Value Loss: 0.2424, Entropy Loss: -73.9579
Current Intensity: 3.0130, Max Intensity: 8.7050
---


 42%|████▏     | 1250/3000 [10:56<31:55,  1.09s/it]

Evaluation at episode 1250: Average Reward = 125.0785


 42%|████▏     | 1255/3000 [10:58<18:41,  1.56it/s]

Episode 1255, Reward: 61.6823
Policy Loss: -0.2519, Value Loss: 0.1511, Entropy Loss: -73.6633
Current Intensity: 0.3745, Max Intensity: 8.7323
---


 42%|████▏     | 1260/3000 [11:00<17:37,  1.65it/s]

Episode 1260, Reward: 62.2523
Policy Loss: -0.2412, Value Loss: 0.1532, Entropy Loss: -73.9896
Current Intensity: 1.7513, Max Intensity: 7.1569
---


 42%|████▏     | 1265/3000 [11:03<16:41,  1.73it/s]

Episode 1265, Reward: 55.2633
Policy Loss: -0.1440, Value Loss: 0.1933, Entropy Loss: -73.5032
Current Intensity: 0.7415, Max Intensity: 7.6825
---


 42%|████▏     | 1270/3000 [11:05<17:45,  1.62it/s]

Episode 1270, Reward: 55.9711
Policy Loss: -0.2969, Value Loss: 0.1612, Entropy Loss: -73.0604
Current Intensity: 1.0746, Max Intensity: 8.8014
---


 42%|████▏     | 1274/3000 [11:06<11:42,  2.46it/s]

Episode 1275, Reward: 58.1969
Policy Loss: -0.1814, Value Loss: 0.1992, Entropy Loss: -73.1521
Current Intensity: 0.4049, Max Intensity: 7.6780
---


 42%|████▎     | 1275/3000 [11:09<28:37,  1.00it/s]

Evaluation at episode 1275: Average Reward = 125.5412


 43%|████▎     | 1280/3000 [11:11<20:34,  1.39it/s]

Episode 1280, Reward: 63.6988
Policy Loss: -0.0499, Value Loss: 0.2059, Entropy Loss: -72.8398
Current Intensity: 0.8866, Max Intensity: 8.8178
---


 43%|████▎     | 1285/3000 [11:14<16:27,  1.74it/s]

Episode 1285, Reward: 62.1806
Policy Loss: -0.1687, Value Loss: 0.1704, Entropy Loss: -72.9768
Current Intensity: 1.3877, Max Intensity: 8.7404
---


 43%|████▎     | 1290/3000 [11:16<18:26,  1.55it/s]

Episode 1290, Reward: 64.5239
Policy Loss: -0.3316, Value Loss: 0.2012, Entropy Loss: -73.4686
Current Intensity: 1.1691, Max Intensity: 7.6526
---


 43%|████▎     | 1295/3000 [11:18<16:15,  1.75it/s]

Episode 1295, Reward: 72.2925
Policy Loss: 0.0040, Value Loss: 0.1846, Entropy Loss: -72.1392
Current Intensity: 1.7208, Max Intensity: 8.6833
---


 43%|████▎     | 1299/3000 [11:20<10:18,  2.75it/s]

Episode 1300, Reward: 64.5584
Policy Loss: -0.2137, Value Loss: 0.1989, Entropy Loss: -73.0347
Current Intensity: 1.2028, Max Intensity: 7.5391
---


 43%|████▎     | 1300/3000 [11:22<31:44,  1.12s/it]

Evaluation at episode 1300: Average Reward = 126.3245


 44%|████▎     | 1305/3000 [11:25<18:31,  1.53it/s]

Episode 1305, Reward: 68.3952
Policy Loss: -0.0784, Value Loss: 0.2141, Entropy Loss: -72.5770
Current Intensity: 0.9687, Max Intensity: 8.5158
---


 44%|████▎     | 1310/3000 [11:27<17:39,  1.60it/s]

Episode 1310, Reward: 62.7867
Policy Loss: -0.2901, Value Loss: 0.2337, Entropy Loss: -72.3356
Current Intensity: 0.8155, Max Intensity: 8.7851
---


 44%|████▍     | 1315/3000 [11:29<16:06,  1.74it/s]

Episode 1315, Reward: 58.4808
Policy Loss: 0.0130, Value Loss: 0.1665, Entropy Loss: -72.5000
Current Intensity: 0.9151, Max Intensity: 6.8484
---


 44%|████▍     | 1320/3000 [11:32<17:39,  1.59it/s]

Episode 1320, Reward: 58.1756
Policy Loss: -0.0936, Value Loss: 0.1541, Entropy Loss: -71.7055
Current Intensity: 0.5037, Max Intensity: 8.7014
---


 44%|████▍     | 1324/3000 [11:33<10:36,  2.63it/s]

Episode 1325, Reward: 60.6094
Policy Loss: -0.2737, Value Loss: 0.1583, Entropy Loss: -72.2385
Current Intensity: 1.2202, Max Intensity: 8.7392
---


 44%|████▍     | 1325/3000 [11:36<29:34,  1.06s/it]

Evaluation at episode 1325: Average Reward = 125.8974


 44%|████▍     | 1330/3000 [11:38<18:26,  1.51it/s]

Episode 1330, Reward: 64.3043
Policy Loss: -0.2361, Value Loss: 0.1876, Entropy Loss: -71.9580
Current Intensity: 0.8330, Max Intensity: 7.2623
---


 44%|████▍     | 1335/3000 [11:41<16:17,  1.70it/s]

Episode 1335, Reward: 62.0310
Policy Loss: -0.0565, Value Loss: 0.1475, Entropy Loss: -72.0671
Current Intensity: 0.6882, Max Intensity: 7.2873
---


 45%|████▍     | 1340/3000 [11:43<17:16,  1.60it/s]

Episode 1340, Reward: 68.0093
Policy Loss: -0.1798, Value Loss: 0.2391, Entropy Loss: -72.0773
Current Intensity: 1.2713, Max Intensity: 7.1778
---


 45%|████▍     | 1345/3000 [11:45<16:01,  1.72it/s]

Episode 1345, Reward: 62.8932
Policy Loss: -0.0769, Value Loss: 0.1629, Entropy Loss: -71.7666
Current Intensity: 1.0085, Max Intensity: 7.2525
---


 45%|████▍     | 1349/3000 [11:47<11:31,  2.39it/s]

Episode 1350, Reward: 71.2794
Policy Loss: -0.1567, Value Loss: 0.2093, Entropy Loss: -71.5849
Current Intensity: 1.0329, Max Intensity: 7.1850
---


 45%|████▌     | 1350/3000 [11:49<28:48,  1.05s/it]

Evaluation at episode 1350: Average Reward = 125.7495


 45%|████▌     | 1355/3000 [11:52<19:18,  1.42it/s]

Episode 1355, Reward: 60.6709
Policy Loss: -0.1028, Value Loss: 0.1968, Entropy Loss: -71.3345
Current Intensity: 1.0041, Max Intensity: 7.3871
---


 45%|████▌     | 1360/3000 [11:54<15:12,  1.80it/s]

Episode 1360, Reward: 68.9398
Policy Loss: -0.1401, Value Loss: 0.2110, Entropy Loss: -71.4964
Current Intensity: 0.6296, Max Intensity: 7.3806
---


 46%|████▌     | 1365/3000 [11:57<18:50,  1.45it/s]

Episode 1365, Reward: 69.7556
Policy Loss: -0.1814, Value Loss: 0.3221, Entropy Loss: -70.8631
Current Intensity: 1.6460, Max Intensity: 7.8774
---


 46%|████▌     | 1370/3000 [11:59<15:48,  1.72it/s]

Episode 1370, Reward: 69.1708
Policy Loss: -0.1875, Value Loss: 0.1900, Entropy Loss: -70.7696
Current Intensity: 0.7203, Max Intensity: 6.1583
---


 46%|████▌     | 1374/3000 [12:00<09:41,  2.80it/s]

Episode 1375, Reward: 68.4951
Policy Loss: -0.0442, Value Loss: 0.2219, Entropy Loss: -70.8244
Current Intensity: 1.4688, Max Intensity: 7.3798
---


 46%|████▌     | 1375/3000 [12:03<31:05,  1.15s/it]

Evaluation at episode 1375: Average Reward = 126.1934


 46%|████▌     | 1380/3000 [12:05<18:19,  1.47it/s]

Episode 1380, Reward: 65.0893
Policy Loss: -0.0419, Value Loss: 0.1785, Entropy Loss: -71.2612
Current Intensity: 0.7664, Max Intensity: 7.4510
---


 46%|████▌     | 1385/3000 [12:08<17:28,  1.54it/s]

Episode 1385, Reward: 62.8236
Policy Loss: -0.0036, Value Loss: 0.1441, Entropy Loss: -71.7724
Current Intensity: 1.6866, Max Intensity: 7.8163
---


 46%|████▋     | 1390/3000 [12:10<15:11,  1.77it/s]

Episode 1390, Reward: 68.2667
Policy Loss: -0.2521, Value Loss: 0.2317, Entropy Loss: -71.4135
Current Intensity: 0.9033, Max Intensity: 6.5356
---


 46%|████▋     | 1395/3000 [12:12<15:38,  1.71it/s]

Episode 1395, Reward: 69.5459
Policy Loss: -0.0718, Value Loss: 0.2172, Entropy Loss: -71.7399
Current Intensity: 0.8001, Max Intensity: 7.4353
---


 47%|████▋     | 1399/3000 [12:14<10:05,  2.65it/s]

Episode 1400, Reward: 67.2415
Policy Loss: -0.1590, Value Loss: 0.2806, Entropy Loss: -71.7903
Current Intensity: 2.1760, Max Intensity: 5.4383
---


 47%|████▋     | 1400/3000 [12:16<27:35,  1.03s/it]

Evaluation at episode 1400: Average Reward = 125.0398


 47%|████▋     | 1405/3000 [12:19<18:35,  1.43it/s]

Episode 1405, Reward: 64.9538
Policy Loss: -0.1558, Value Loss: 0.1795, Entropy Loss: -70.9831
Current Intensity: 0.5112, Max Intensity: 6.7717
---


 47%|████▋     | 1410/3000 [12:21<15:42,  1.69it/s]

Episode 1410, Reward: 65.3337
Policy Loss: -0.0734, Value Loss: 0.1222, Entropy Loss: -70.5977
Current Intensity: 1.3683, Max Intensity: 6.7269
---


 47%|████▋     | 1415/3000 [12:24<18:08,  1.46it/s]

Episode 1415, Reward: 68.7466
Policy Loss: -0.2117, Value Loss: 0.1717, Entropy Loss: -70.5353
Current Intensity: 1.4672, Max Intensity: 4.9682
---


 47%|████▋     | 1420/3000 [12:26<15:07,  1.74it/s]

Episode 1420, Reward: 63.3334
Policy Loss: -0.1294, Value Loss: 0.1945, Entropy Loss: -70.1169
Current Intensity: 1.7723, Max Intensity: 6.8038
---


 47%|████▋     | 1424/3000 [12:27<09:57,  2.64it/s]

Episode 1425, Reward: 75.5373
Policy Loss: -0.1194, Value Loss: 0.2209, Entropy Loss: -69.4407
Current Intensity: 0.4872, Max Intensity: 6.7392
---


 48%|████▊     | 1425/3000 [12:30<27:34,  1.05s/it]

Evaluation at episode 1425: Average Reward = 124.3797


 48%|████▊     | 1430/3000 [12:32<17:53,  1.46it/s]

Episode 1430, Reward: 66.5880
Policy Loss: -0.1941, Value Loss: 0.1963, Entropy Loss: -69.8481
Current Intensity: 2.0004, Max Intensity: 6.4946
---


 48%|████▊     | 1435/3000 [12:35<15:58,  1.63it/s]

Episode 1435, Reward: 76.8440
Policy Loss: -0.0998, Value Loss: 0.1609, Entropy Loss: -69.8699
Current Intensity: 0.4289, Max Intensity: 6.4916
---


 48%|████▊     | 1440/3000 [12:37<15:39,  1.66it/s]

Episode 1440, Reward: 74.2891
Policy Loss: -0.2735, Value Loss: 0.1621, Entropy Loss: -70.2922
Current Intensity: 1.7164, Max Intensity: 6.4696
---


 48%|████▊     | 1445/3000 [12:40<16:21,  1.58it/s]

Episode 1445, Reward: 70.5059
Policy Loss: -0.0168, Value Loss: 0.2263, Entropy Loss: -69.8205
Current Intensity: 0.7419, Max Intensity: 5.0181
---


 48%|████▊     | 1449/3000 [12:41<09:57,  2.60it/s]

Episode 1450, Reward: 73.1394
Policy Loss: -0.1736, Value Loss: 0.2290, Entropy Loss: -69.4686
Current Intensity: 2.7017, Max Intensity: 6.4764
---


 48%|████▊     | 1450/3000 [12:44<26:50,  1.04s/it]

Evaluation at episode 1450: Average Reward = 123.7616


 48%|████▊     | 1455/3000 [12:46<16:48,  1.53it/s]

Episode 1455, Reward: 74.1250
Policy Loss: -0.0444, Value Loss: 0.2575, Entropy Loss: -70.1342
Current Intensity: 2.1339, Max Intensity: 4.6854
---


 49%|████▊     | 1460/3000 [12:48<16:03,  1.60it/s]

Episode 1460, Reward: 72.8556
Policy Loss: -0.0938, Value Loss: 0.2057, Entropy Loss: -70.1104
Current Intensity: 0.8189, Max Intensity: 5.4434
---


 49%|████▉     | 1465/3000 [12:51<15:34,  1.64it/s]

Episode 1465, Reward: 65.5614
Policy Loss: -0.2209, Value Loss: 0.2197, Entropy Loss: -71.1472
Current Intensity: 1.5760, Max Intensity: 6.4996
---


 49%|████▉     | 1470/3000 [12:53<17:03,  1.49it/s]

Episode 1470, Reward: 78.1606
Policy Loss: -0.1849, Value Loss: 0.2026, Entropy Loss: -69.7350
Current Intensity: 1.6949, Max Intensity: 6.4518
---


 49%|████▉     | 1474/3000 [12:55<10:22,  2.45it/s]

Episode 1475, Reward: 75.0046
Policy Loss: -0.2770, Value Loss: 0.2030, Entropy Loss: -69.8843
Current Intensity: 1.6878, Max Intensity: 6.5000
---


 49%|████▉     | 1475/3000 [12:57<26:28,  1.04s/it]

Evaluation at episode 1475: Average Reward = 121.8142


 49%|████▉     | 1480/3000 [13:00<16:21,  1.55it/s]

Episode 1480, Reward: 73.0462
Policy Loss: -0.1347, Value Loss: 0.2350, Entropy Loss: -69.6898
Current Intensity: 0.5190, Max Intensity: 6.9855
---


 50%|████▉     | 1485/3000 [13:02<15:06,  1.67it/s]

Episode 1485, Reward: 76.9047
Policy Loss: -0.2421, Value Loss: 0.1973, Entropy Loss: -70.0251
Current Intensity: 2.8285, Max Intensity: 6.9381
---


 50%|████▉     | 1490/3000 [13:05<15:35,  1.61it/s]

Episode 1490, Reward: 70.7911
Policy Loss: -0.1120, Value Loss: 0.2007, Entropy Loss: -69.7364
Current Intensity: 1.8684, Max Intensity: 5.9164
---


 50%|████▉     | 1495/3000 [13:07<15:00,  1.67it/s]

Episode 1495, Reward: 72.8479
Policy Loss: -0.2040, Value Loss: 0.1870, Entropy Loss: -70.3179
Current Intensity: 1.4685, Max Intensity: 6.9967
---


 50%|████▉     | 1499/3000 [13:08<09:58,  2.51it/s]

Episode 1500, Reward: 67.5782
Policy Loss: -0.1952, Value Loss: 0.2002, Entropy Loss: -69.3421
Current Intensity: 0.5950, Max Intensity: 6.8761
---


 50%|█████     | 1500/3000 [13:11<26:37,  1.07s/it]

Evaluation at episode 1500: Average Reward = 125.4320


 50%|█████     | 1505/3000 [13:13<17:15,  1.44it/s]

Episode 1505, Reward: 73.4814
Policy Loss: -0.1539, Value Loss: 0.2431, Entropy Loss: -68.6582
Current Intensity: 1.3828, Max Intensity: 5.5465
---


 50%|█████     | 1510/3000 [13:16<15:08,  1.64it/s]

Episode 1510, Reward: 82.7704
Policy Loss: -0.0515, Value Loss: 0.1628, Entropy Loss: -68.4948
Current Intensity: 1.2085, Max Intensity: 7.2335
---


 50%|█████     | 1515/3000 [13:18<15:14,  1.62it/s]

Episode 1515, Reward: 78.5123
Policy Loss: -0.2209, Value Loss: 0.1760, Entropy Loss: -68.0768
Current Intensity: 0.9643, Max Intensity: 7.2969
---


 51%|█████     | 1520/3000 [13:20<13:42,  1.80it/s]

Episode 1520, Reward: 84.0661
Policy Loss: -0.3323, Value Loss: 0.2583, Entropy Loss: -67.8535
Current Intensity: 0.8995, Max Intensity: 6.1412
---


 51%|█████     | 1524/3000 [13:22<09:18,  2.64it/s]

Episode 1525, Reward: 72.1037
Policy Loss: -0.2821, Value Loss: 0.1580, Entropy Loss: -68.7237
Current Intensity: 1.5806, Max Intensity: 6.1582
---


 51%|█████     | 1525/3000 [13:24<25:49,  1.05s/it]

Evaluation at episode 1525: Average Reward = 126.4087


 51%|█████     | 1530/3000 [13:27<17:08,  1.43it/s]

Episode 1530, Reward: 70.3668
Policy Loss: -0.1256, Value Loss: 0.1891, Entropy Loss: -67.8041
Current Intensity: 1.0443, Max Intensity: 7.7643
---


 51%|█████     | 1535/3000 [13:29<15:15,  1.60it/s]

Episode 1535, Reward: 71.6628
Policy Loss: -0.0270, Value Loss: 0.1965, Entropy Loss: -67.6868
Current Intensity: 1.8819, Max Intensity: 7.6381
---


 51%|█████▏    | 1540/3000 [13:31<14:50,  1.64it/s]

Episode 1540, Reward: 84.6427
Policy Loss: -0.1829, Value Loss: 0.1673, Entropy Loss: -68.0468
Current Intensity: 2.3909, Max Intensity: 7.6497
---


 52%|█████▏    | 1545/3000 [13:34<15:21,  1.58it/s]

Episode 1545, Reward: 84.7273
Policy Loss: -0.0496, Value Loss: 0.1702, Entropy Loss: -67.9413
Current Intensity: 1.3806, Max Intensity: 7.6939
---


 52%|█████▏    | 1549/3000 [13:35<09:26,  2.56it/s]

Episode 1550, Reward: 78.4319
Policy Loss: -0.0956, Value Loss: 0.1739, Entropy Loss: -67.8863
Current Intensity: 2.5971, Max Intensity: 7.7667
---


 52%|█████▏    | 1550/3000 [13:38<25:39,  1.06s/it]

Evaluation at episode 1550: Average Reward = 125.5084


 52%|█████▏    | 1555/3000 [13:40<16:12,  1.49it/s]

Episode 1555, Reward: 86.8830
Policy Loss: -0.1085, Value Loss: 0.2441, Entropy Loss: -67.1893
Current Intensity: 2.0515, Max Intensity: 6.2067
---


 52%|█████▏    | 1560/3000 [13:43<15:26,  1.55it/s]

Episode 1560, Reward: 79.2940
Policy Loss: -0.2480, Value Loss: 0.2014, Entropy Loss: -66.8360
Current Intensity: 1.6046, Max Intensity: 6.1940
---


 52%|█████▏    | 1565/3000 [13:45<14:22,  1.66it/s]

Episode 1565, Reward: 83.0679
Policy Loss: 0.0214, Value Loss: 0.1465, Entropy Loss: -66.7514
Current Intensity: 1.4528, Max Intensity: 6.1942
---


 52%|█████▏    | 1570/3000 [13:48<14:56,  1.59it/s]

Episode 1570, Reward: 87.5941
Policy Loss: -0.0814, Value Loss: 0.1198, Entropy Loss: -67.1476
Current Intensity: 1.5526, Max Intensity: 6.2036
---


 52%|█████▏    | 1574/3000 [13:49<09:25,  2.52it/s]

Episode 1575, Reward: 88.3820
Policy Loss: -0.1662, Value Loss: 0.1579, Entropy Loss: -68.0646
Current Intensity: 1.6917, Max Intensity: 6.2662
---


 52%|█████▎    | 1575/3000 [13:51<23:23,  1.02it/s]

Evaluation at episode 1575: Average Reward = 126.2681


 53%|█████▎    | 1580/3000 [13:54<16:57,  1.40it/s]

Episode 1580, Reward: 83.0536
Policy Loss: -0.1944, Value Loss: 0.2203, Entropy Loss: -67.7772
Current Intensity: 2.9046, Max Intensity: 6.1560
---


 53%|█████▎    | 1585/3000 [13:56<13:41,  1.72it/s]

Episode 1585, Reward: 90.0897
Policy Loss: -0.0831, Value Loss: 0.1826, Entropy Loss: -67.1978
Current Intensity: 1.0655, Max Intensity: 6.8577
---


 53%|█████▎    | 1590/3000 [13:58<14:38,  1.61it/s]

Episode 1590, Reward: 73.2100
Policy Loss: -0.1373, Value Loss: 0.1880, Entropy Loss: -67.1647
Current Intensity: 2.7869, Max Intensity: 6.8909
---


 53%|█████▎    | 1595/3000 [14:01<13:31,  1.73it/s]

Episode 1595, Reward: 75.0446
Policy Loss: -0.1228, Value Loss: 0.2033, Entropy Loss: -66.9660
Current Intensity: 1.4421, Max Intensity: 6.2059
---


 53%|█████▎    | 1599/3000 [14:02<08:40,  2.69it/s]

Episode 1600, Reward: 75.4957
Policy Loss: -0.2494, Value Loss: 0.2171, Entropy Loss: -67.0108
Current Intensity: 1.4833, Max Intensity: 6.9449
---


 53%|█████▎    | 1600/3000 [14:05<25:11,  1.08s/it]

Evaluation at episode 1600: Average Reward = 126.6885


 54%|█████▎    | 1605/3000 [14:07<14:54,  1.56it/s]

Episode 1605, Reward: 89.4737
Policy Loss: -0.1228, Value Loss: 0.1428, Entropy Loss: -65.9412
Current Intensity: 2.7125, Max Intensity: 8.0005
---


 54%|█████▎    | 1610/3000 [14:10<15:51,  1.46it/s]

Episode 1610, Reward: 84.4360
Policy Loss: -0.2133, Value Loss: 0.1932, Entropy Loss: -66.2991
Current Intensity: 2.1036, Max Intensity: 7.9793
---


 54%|█████▍    | 1615/3000 [14:12<13:19,  1.73it/s]

Episode 1615, Reward: 87.8336
Policy Loss: -0.1257, Value Loss: 0.1681, Entropy Loss: -65.3938
Current Intensity: 2.1407, Max Intensity: 8.0474
---


 54%|█████▍    | 1620/3000 [14:14<14:43,  1.56it/s]

Episode 1620, Reward: 81.6162
Policy Loss: -0.1594, Value Loss: 0.1852, Entropy Loss: -65.1555
Current Intensity: 1.4142, Max Intensity: 8.0629
---


 54%|█████▍    | 1624/3000 [14:16<08:42,  2.63it/s]

Episode 1625, Reward: 86.8057
Policy Loss: -0.1296, Value Loss: 0.0929, Entropy Loss: -65.1710
Current Intensity: 1.6993, Max Intensity: 8.0497
---


 54%|█████▍    | 1625/3000 [14:18<23:59,  1.05s/it]

Evaluation at episode 1625: Average Reward = 126.2786


 54%|█████▍    | 1630/3000 [14:21<15:22,  1.49it/s]

Episode 1630, Reward: 89.6683
Policy Loss: -0.1564, Value Loss: 0.1654, Entropy Loss: -65.0939
Current Intensity: 2.2413, Max Intensity: 7.3007
---


 55%|█████▍    | 1635/3000 [14:23<13:12,  1.72it/s]

Episode 1635, Reward: 87.7521
Policy Loss: -0.0109, Value Loss: 0.1991, Entropy Loss: -65.0888
Current Intensity: 1.6950, Max Intensity: 7.3567
---


 55%|█████▍    | 1640/3000 [14:25<14:29,  1.56it/s]

Episode 1640, Reward: 83.1329
Policy Loss: -0.2142, Value Loss: 0.1364, Entropy Loss: -64.6898
Current Intensity: 1.3342, Max Intensity: 7.4460
---


 55%|█████▍    | 1645/3000 [14:28<13:20,  1.69it/s]

Episode 1645, Reward: 87.4442
Policy Loss: -0.1939, Value Loss: 0.1449, Entropy Loss: -64.3676
Current Intensity: 2.1831, Max Intensity: 7.4200
---


 55%|█████▍    | 1649/3000 [14:29<09:36,  2.34it/s]

Episode 1650, Reward: 86.5710
Policy Loss: -0.0963, Value Loss: 0.1883, Entropy Loss: -63.9029
Current Intensity: 3.1942, Max Intensity: 7.4247
---


 55%|█████▌    | 1650/3000 [14:32<23:57,  1.07s/it]

Evaluation at episode 1650: Average Reward = 125.2435


 55%|█████▌    | 1655/3000 [14:34<15:04,  1.49it/s]

Episode 1655, Reward: 87.3109
Policy Loss: -0.1689, Value Loss: 0.1447, Entropy Loss: -63.6628
Current Intensity: 1.9536, Max Intensity: 7.3771
---


 55%|█████▌    | 1660/3000 [14:37<13:15,  1.68it/s]

Episode 1660, Reward: 90.9728
Policy Loss: -0.2937, Value Loss: 0.1438, Entropy Loss: -64.1973
Current Intensity: 1.5871, Max Intensity: 6.6100
---


 56%|█████▌    | 1665/3000 [14:39<13:34,  1.64it/s]

Episode 1665, Reward: 92.4234
Policy Loss: -0.2394, Value Loss: 0.1505, Entropy Loss: -64.5667
Current Intensity: 3.6524, Max Intensity: 6.1234
---


 56%|█████▌    | 1670/3000 [14:41<13:20,  1.66it/s]

Episode 1670, Reward: 89.7568
Policy Loss: -0.1789, Value Loss: 0.1531, Entropy Loss: -65.0231
Current Intensity: 2.8954, Max Intensity: 7.3271
---


 56%|█████▌    | 1674/3000 [14:43<08:22,  2.64it/s]

Episode 1675, Reward: 90.5400
Policy Loss: -0.0564, Value Loss: 0.1261, Entropy Loss: -64.9688
Current Intensity: 2.0655, Max Intensity: 7.3345
---


 56%|█████▌    | 1675/3000 [14:45<23:38,  1.07s/it]

Evaluation at episode 1675: Average Reward = 125.1743


 56%|█████▌    | 1680/3000 [14:48<14:20,  1.53it/s]

Episode 1680, Reward: 87.4866
Policy Loss: -0.1945, Value Loss: 0.1386, Entropy Loss: -64.4799
Current Intensity: 3.4243, Max Intensity: 5.3429
---


 56%|█████▌    | 1685/3000 [14:50<13:43,  1.60it/s]

Episode 1685, Reward: 97.8261
Policy Loss: -0.2766, Value Loss: 0.1753, Entropy Loss: -64.4605
Current Intensity: 3.5728, Max Intensity: 6.4883
---


 56%|█████▋    | 1690/3000 [14:53<12:58,  1.68it/s]

Episode 1690, Reward: 90.5668
Policy Loss: -0.2821, Value Loss: 0.1299, Entropy Loss: -64.3275
Current Intensity: 1.7047, Max Intensity: 6.4762
---


 56%|█████▋    | 1695/3000 [14:55<13:38,  1.59it/s]

Episode 1695, Reward: 84.8578
Policy Loss: -0.0668, Value Loss: 0.1884, Entropy Loss: -63.9243
Current Intensity: 2.0236, Max Intensity: 4.8463
---


 57%|█████▋    | 1699/3000 [14:56<09:00,  2.40it/s]

Episode 1700, Reward: 91.5494
Policy Loss: -0.0868, Value Loss: 0.1464, Entropy Loss: -62.8985
Current Intensity: 1.2729, Max Intensity: 6.4970
---


 57%|█████▋    | 1700/3000 [14:59<22:37,  1.04s/it]

Evaluation at episode 1700: Average Reward = 125.2339


 57%|█████▋    | 1705/3000 [15:02<15:55,  1.36it/s]

Episode 1705, Reward: 95.6211
Policy Loss: -0.1746, Value Loss: 0.1243, Entropy Loss: -64.2600
Current Intensity: 1.7262, Max Intensity: 7.0120
---


 57%|█████▋    | 1710/3000 [15:04<12:50,  1.67it/s]

Episode 1710, Reward: 83.5602
Policy Loss: -0.0905, Value Loss: 0.1651, Entropy Loss: -63.6679
Current Intensity: 2.1486, Max Intensity: 7.0704
---


 57%|█████▋    | 1715/3000 [15:06<13:01,  1.65it/s]

Episode 1715, Reward: 87.2756
Policy Loss: -0.2312, Value Loss: 0.1598, Entropy Loss: -63.8589
Current Intensity: 2.1677, Max Intensity: 5.8109
---


 57%|█████▋    | 1720/3000 [15:09<12:29,  1.71it/s]

Episode 1720, Reward: 90.5468
Policy Loss: -0.1269, Value Loss: 0.1229, Entropy Loss: -63.1082
Current Intensity: 1.0493, Max Intensity: 7.0241
---


 57%|█████▋    | 1724/3000 [15:10<08:25,  2.53it/s]

Episode 1725, Reward: 93.4849
Policy Loss: 0.0704, Value Loss: 0.1951, Entropy Loss: -63.5452
Current Intensity: 2.0833, Max Intensity: 6.1512
---


 57%|█████▊    | 1725/3000 [15:13<22:51,  1.08s/it]

Evaluation at episode 1725: Average Reward = 125.3256


 58%|█████▊    | 1730/3000 [15:15<14:16,  1.48it/s]

Episode 1730, Reward: 92.8240
Policy Loss: -0.0640, Value Loss: 0.2074, Entropy Loss: -63.1560
Current Intensity: 1.6845, Max Intensity: 6.5402
---


 58%|█████▊    | 1735/3000 [15:17<13:01,  1.62it/s]

Episode 1735, Reward: 99.5568
Policy Loss: -0.0319, Value Loss: 0.1520, Entropy Loss: -62.6992
Current Intensity: 3.2874, Max Intensity: 6.4758
---


 58%|█████▊    | 1740/3000 [15:20<12:28,  1.68it/s]

Episode 1740, Reward: 94.2062
Policy Loss: -0.2517, Value Loss: 0.1585, Entropy Loss: -62.6272
Current Intensity: 3.3136, Max Intensity: 6.3885
---


 58%|█████▊    | 1745/3000 [15:22<12:53,  1.62it/s]

Episode 1745, Reward: 92.1177
Policy Loss: -0.3242, Value Loss: 0.1712, Entropy Loss: -62.6195
Current Intensity: 1.6058, Max Intensity: 6.6111
---


 58%|█████▊    | 1749/3000 [15:23<07:45,  2.69it/s]

Episode 1750, Reward: 96.1255
Policy Loss: -0.2044, Value Loss: 0.0963, Entropy Loss: -62.1830
Current Intensity: 1.2710, Max Intensity: 6.5718
---


 58%|█████▊    | 1750/3000 [15:26<22:03,  1.06s/it]

Evaluation at episode 1750: Average Reward = 125.4817


 58%|█████▊    | 1755/3000 [15:29<14:31,  1.43it/s]

Episode 1755, Reward: 70.5441
Policy Loss: -0.2118, Value Loss: 0.1187, Entropy Loss: -62.4738
Current Intensity: 2.1428, Max Intensity: 5.3544
---


 59%|█████▊    | 1760/3000 [15:31<12:58,  1.59it/s]

Episode 1760, Reward: 95.2278
Policy Loss: -0.0841, Value Loss: 0.1036, Entropy Loss: -62.5372
Current Intensity: 1.8879, Max Intensity: 6.6626
---


 59%|█████▉    | 1765/3000 [15:33<12:44,  1.61it/s]

Episode 1765, Reward: 93.7110
Policy Loss: -0.1352, Value Loss: 0.1372, Entropy Loss: -62.1636
Current Intensity: 1.9605, Max Intensity: 6.6495
---


 59%|█████▉    | 1770/3000 [15:36<12:39,  1.62it/s]

Episode 1770, Reward: 90.7656
Policy Loss: -0.1323, Value Loss: 0.1736, Entropy Loss: -61.9279
Current Intensity: 2.4649, Max Intensity: 6.6209
---


 59%|█████▉    | 1774/3000 [15:37<07:52,  2.60it/s]

Episode 1775, Reward: 90.2056
Policy Loss: -0.1248, Value Loss: 0.1324, Entropy Loss: -61.9784
Current Intensity: 1.6308, Max Intensity: 6.6845
---


 59%|█████▉    | 1775/3000 [15:40<23:15,  1.14s/it]

Evaluation at episode 1775: Average Reward = 126.3082


 59%|█████▉    | 1780/3000 [15:43<15:16,  1.33it/s]

Episode 1780, Reward: 96.6238
Policy Loss: -0.1942, Value Loss: 0.1476, Entropy Loss: -61.5212
Current Intensity: 1.8390, Max Intensity: 6.2903
---


 60%|█████▉    | 1785/3000 [15:45<14:47,  1.37it/s]

Episode 1785, Reward: 94.7678
Policy Loss: -0.0481, Value Loss: 0.1325, Entropy Loss: -61.3669
Current Intensity: 2.2141, Max Intensity: 6.8907
---


 60%|█████▉    | 1790/3000 [15:48<12:24,  1.63it/s]

Episode 1790, Reward: 98.9881
Policy Loss: -0.1046, Value Loss: 0.1282, Entropy Loss: -61.3211
Current Intensity: 2.2916, Max Intensity: 7.0121
---


 60%|█████▉    | 1795/3000 [15:50<12:06,  1.66it/s]

Episode 1795, Reward: 97.5150
Policy Loss: -0.2381, Value Loss: 0.1245, Entropy Loss: -61.1111
Current Intensity: 2.6280, Max Intensity: 6.9419
---


 60%|█████▉    | 1799/3000 [15:51<07:48,  2.57it/s]

Episode 1800, Reward: 104.5840
Policy Loss: -0.2440, Value Loss: 0.1037, Entropy Loss: -61.6324
Current Intensity: 3.1811, Max Intensity: 6.5136
---


 60%|██████    | 1800/3000 [15:54<21:15,  1.06s/it]

Evaluation at episode 1800: Average Reward = 126.4654


 60%|██████    | 1805/3000 [15:57<13:37,  1.46it/s]

Episode 1805, Reward: 99.7684
Policy Loss: -0.1194, Value Loss: 0.0649, Entropy Loss: -61.8103
Current Intensity: 2.8453, Max Intensity: 7.9351
---


 60%|██████    | 1810/3000 [15:59<11:11,  1.77it/s]

Episode 1810, Reward: 99.3405
Policy Loss: -0.1193, Value Loss: 0.1406, Entropy Loss: -61.1898
Current Intensity: 3.0924, Max Intensity: 7.8975
---


 60%|██████    | 1815/3000 [16:01<12:10,  1.62it/s]

Episode 1815, Reward: 95.8111
Policy Loss: -0.1595, Value Loss: 0.1481, Entropy Loss: -60.3834
Current Intensity: 3.2300, Max Intensity: 7.9751
---


 61%|██████    | 1820/3000 [16:04<11:41,  1.68it/s]

Episode 1820, Reward: 106.3186
Policy Loss: -0.0760, Value Loss: 0.1438, Entropy Loss: -60.8739
Current Intensity: 2.1100, Max Intensity: 7.9439
---


 61%|██████    | 1824/3000 [16:05<09:47,  2.00it/s]

Episode 1825, Reward: 104.3947
Policy Loss: 0.0194, Value Loss: 0.1122, Entropy Loss: -61.5671
Current Intensity: 2.4295, Max Intensity: 7.9232
---


 61%|██████    | 1825/3000 [16:08<24:44,  1.26s/it]

Evaluation at episode 1825: Average Reward = 125.2640


 61%|██████    | 1830/3000 [16:12<17:43,  1.10it/s]

Episode 1830, Reward: 90.6189
Policy Loss: -0.1389, Value Loss: 0.2162, Entropy Loss: -61.0155
Current Intensity: 2.7158, Max Intensity: 6.5352
---


 61%|██████    | 1835/3000 [16:14<13:15,  1.46it/s]

Episode 1835, Reward: 97.4914
Policy Loss: -0.2433, Value Loss: 0.1599, Entropy Loss: -60.9959
Current Intensity: 2.6172, Max Intensity: 7.5559
---


 61%|██████▏   | 1840/3000 [16:17<14:56,  1.29it/s]

Episode 1840, Reward: 99.4603
Policy Loss: -0.1970, Value Loss: 0.1612, Entropy Loss: -62.3840
Current Intensity: 2.3733, Max Intensity: 7.5765
---


 62%|██████▏   | 1845/3000 [16:20<11:51,  1.62it/s]

Episode 1845, Reward: 101.0932
Policy Loss: -0.2152, Value Loss: 0.1034, Entropy Loss: -60.9468
Current Intensity: 3.6446, Max Intensity: 6.5299
---


 62%|██████▏   | 1849/3000 [16:21<08:08,  2.36it/s]

Episode 1850, Reward: 99.3804
Policy Loss: -0.1309, Value Loss: 0.1331, Entropy Loss: -61.4518
Current Intensity: 3.5133, Max Intensity: 7.5251
---


 62%|██████▏   | 1850/3000 [16:24<20:52,  1.09s/it]

Evaluation at episode 1850: Average Reward = 125.7042


 62%|██████▏   | 1855/3000 [16:27<15:15,  1.25it/s]

Episode 1855, Reward: 104.0691
Policy Loss: -0.2799, Value Loss: 0.1645, Entropy Loss: -60.7890
Current Intensity: 4.4711, Max Intensity: 7.6407
---


 62%|██████▏   | 1860/3000 [16:29<11:08,  1.71it/s]

Episode 1860, Reward: 101.9911
Policy Loss: -0.1095, Value Loss: 0.2126, Entropy Loss: -61.3051
Current Intensity: 1.3470, Max Intensity: 7.5814
---


 62%|██████▏   | 1865/3000 [16:31<12:05,  1.56it/s]

Episode 1865, Reward: 101.6501
Policy Loss: -0.0427, Value Loss: 0.1208, Entropy Loss: -60.6945
Current Intensity: 2.6410, Max Intensity: 7.4928
---


 62%|██████▏   | 1870/3000 [16:34<11:02,  1.71it/s]

Episode 1870, Reward: 104.2814
Policy Loss: -0.1749, Value Loss: 0.1054, Entropy Loss: -60.2866
Current Intensity: 1.6890, Max Intensity: 7.6063
---


 62%|██████▏   | 1874/3000 [16:35<07:11,  2.61it/s]

Episode 1875, Reward: 107.0181
Policy Loss: -0.0821, Value Loss: 0.2139, Entropy Loss: -59.4611
Current Intensity: 2.7425, Max Intensity: 7.5144
---


 62%|██████▎   | 1875/3000 [16:38<20:39,  1.10s/it]

Evaluation at episode 1875: Average Reward = 125.4361


 63%|██████▎   | 1880/3000 [16:40<12:06,  1.54it/s]

Episode 1880, Reward: 102.3207
Policy Loss: -0.0838, Value Loss: 0.0705, Entropy Loss: -58.9449
Current Intensity: 2.4447, Max Intensity: 7.0045
---


 63%|██████▎   | 1885/3000 [16:43<12:13,  1.52it/s]

Episode 1885, Reward: 104.6335
Policy Loss: -0.1189, Value Loss: 0.0670, Entropy Loss: -58.6831
Current Intensity: 3.8203, Max Intensity: 5.5513
---


 63%|██████▎   | 1890/3000 [16:45<10:14,  1.81it/s]

Episode 1890, Reward: 106.9101
Policy Loss: -0.0672, Value Loss: 0.2113, Entropy Loss: -59.8380
Current Intensity: 3.7278, Max Intensity: 5.3737
---


 63%|██████▎   | 1895/3000 [16:47<11:43,  1.57it/s]

Episode 1895, Reward: 98.0151
Policy Loss: -0.1237, Value Loss: 0.1094, Entropy Loss: -58.9815
Current Intensity: 2.0952, Max Intensity: 5.2320
---


 63%|██████▎   | 1899/3000 [16:48<07:01,  2.61it/s]

Episode 1900, Reward: 107.6634
Policy Loss: -0.0719, Value Loss: 0.1431, Entropy Loss: -58.4706
Current Intensity: 2.6623, Max Intensity: 5.6130
---


 63%|██████▎   | 1900/3000 [16:51<19:38,  1.07s/it]

Evaluation at episode 1900: Average Reward = 126.2201


 64%|██████▎   | 1905/3000 [16:54<12:31,  1.46it/s]

Episode 1905, Reward: 103.9284
Policy Loss: -0.1529, Value Loss: 0.0869, Entropy Loss: -57.7091
Current Intensity: 3.2640, Max Intensity: 7.3463
---


 64%|██████▎   | 1910/3000 [16:56<10:28,  1.73it/s]

Episode 1910, Reward: 101.5829
Policy Loss: -0.2343, Value Loss: 0.0636, Entropy Loss: -57.9523
Current Intensity: 2.8117, Max Intensity: 7.9552
---


 64%|██████▍   | 1915/3000 [16:58<11:27,  1.58it/s]

Episode 1915, Reward: 106.7585
Policy Loss: -0.1553, Value Loss: 0.0876, Entropy Loss: -57.9083
Current Intensity: 1.1322, Max Intensity: 7.8380
---


 64%|██████▍   | 1920/3000 [17:01<10:15,  1.76it/s]

Episode 1920, Reward: 104.0465
Policy Loss: -0.2014, Value Loss: 0.0977, Entropy Loss: -57.3686
Current Intensity: 0.9994, Max Intensity: 6.0794
---


 64%|██████▍   | 1924/3000 [17:02<06:28,  2.77it/s]

Episode 1925, Reward: 101.8397
Policy Loss: -0.0937, Value Loss: 0.0774, Entropy Loss: -57.2554
Current Intensity: 2.3372, Max Intensity: 7.9205
---


 64%|██████▍   | 1925/3000 [17:04<19:05,  1.07s/it]

Evaluation at episode 1925: Average Reward = 126.3481


 64%|██████▍   | 1930/3000 [17:07<11:37,  1.53it/s]

Episode 1930, Reward: 107.5361
Policy Loss: -0.0928, Value Loss: 0.0999, Entropy Loss: -56.5548
Current Intensity: 2.9309, Max Intensity: 7.3707
---


 64%|██████▍   | 1935/3000 [17:09<10:57,  1.62it/s]

Episode 1935, Reward: 107.4891
Policy Loss: -0.1441, Value Loss: 0.0558, Entropy Loss: -56.1655
Current Intensity: 4.0197, Max Intensity: 7.5999
---


 65%|██████▍   | 1940/3000 [17:11<10:14,  1.72it/s]

Episode 1940, Reward: 106.1718
Policy Loss: -0.2625, Value Loss: 0.1206, Entropy Loss: -57.2451
Current Intensity: 1.9894, Max Intensity: 7.6450
---


 65%|██████▍   | 1945/3000 [17:14<10:55,  1.61it/s]

Episode 1945, Reward: 108.5845
Policy Loss: -0.3148, Value Loss: 0.0981, Entropy Loss: -57.2848
Current Intensity: 3.2386, Max Intensity: 7.5765
---


 65%|██████▍   | 1949/3000 [17:15<06:19,  2.77it/s]

Episode 1950, Reward: 105.1489
Policy Loss: -0.2171, Value Loss: 0.0458, Entropy Loss: -57.1779
Current Intensity: 2.3715, Max Intensity: 6.3081
---


 65%|██████▌   | 1950/3000 [17:18<17:30,  1.00s/it]

Evaluation at episode 1950: Average Reward = 126.9164


 65%|██████▌   | 1955/3000 [17:20<11:20,  1.54it/s]

Episode 1955, Reward: 111.0098
Policy Loss: -0.1422, Value Loss: 0.1229, Entropy Loss: -56.9155
Current Intensity: 2.9077, Max Intensity: 8.8723
---


 65%|██████▌   | 1960/3000 [17:23<11:40,  1.48it/s]

Episode 1960, Reward: 110.5213
Policy Loss: -0.1862, Value Loss: 0.0941, Entropy Loss: -57.9768
Current Intensity: 3.9457, Max Intensity: 8.8214
---


 66%|██████▌   | 1965/3000 [17:25<09:55,  1.74it/s]

Episode 1965, Reward: 109.1343
Policy Loss: -0.1573, Value Loss: 0.1084, Entropy Loss: -58.2426
Current Intensity: 2.1686, Max Intensity: 8.8570
---


 66%|██████▌   | 1970/3000 [17:28<09:58,  1.72it/s]

Episode 1970, Reward: 110.4783
Policy Loss: -0.2234, Value Loss: 0.0581, Entropy Loss: -56.4640
Current Intensity: 3.0149, Max Intensity: 8.8563
---


 66%|██████▌   | 1974/3000 [17:29<07:25,  2.30it/s]

Episode 1975, Reward: 106.8203
Policy Loss: -0.0829, Value Loss: 0.0728, Entropy Loss: -56.1964
Current Intensity: 2.7177, Max Intensity: 8.8083
---


 66%|██████▌   | 1975/3000 [17:32<20:13,  1.18s/it]

Evaluation at episode 1975: Average Reward = 126.8147


 66%|██████▌   | 1980/3000 [17:35<14:01,  1.21it/s]

Episode 1980, Reward: 109.7002
Policy Loss: -0.1294, Value Loss: 0.0627, Entropy Loss: -56.9736
Current Intensity: 2.0933, Max Intensity: 7.5125
---


 66%|██████▌   | 1985/3000 [17:38<12:23,  1.36it/s]

Episode 1985, Reward: 110.4395
Policy Loss: -0.1094, Value Loss: 0.0793, Entropy Loss: -56.7010
Current Intensity: 2.9087, Max Intensity: 8.8172
---


 66%|██████▋   | 1990/3000 [17:41<13:04,  1.29it/s]

Episode 1990, Reward: 109.9741
Policy Loss: -0.3035, Value Loss: 0.0810, Entropy Loss: -55.9433
Current Intensity: 3.8739, Max Intensity: 8.8759
---


 66%|██████▋   | 1995/3000 [17:45<13:54,  1.20it/s]

Episode 1995, Reward: 112.7312
Policy Loss: -0.2173, Value Loss: 0.0636, Entropy Loss: -56.3890
Current Intensity: 2.4249, Max Intensity: 8.7839
---


 67%|██████▋   | 1999/3000 [17:46<08:18,  2.01it/s]

Episode 2000, Reward: 105.8878
Policy Loss: -0.0976, Value Loss: 0.0643, Entropy Loss: -56.0357
Current Intensity: 2.7323, Max Intensity: 7.1728
---


 67%|██████▋   | 2000/3000 [17:50<23:43,  1.42s/it]

Evaluation at episode 2000: Average Reward = 127.0875


 67%|██████▋   | 2005/3000 [17:52<13:54,  1.19it/s]

Episode 2005, Reward: 112.7671
Policy Loss: -0.1410, Value Loss: 0.0634, Entropy Loss: -55.8276
Current Intensity: 2.5558, Max Intensity: 8.8475
---


 67%|██████▋   | 2010/3000 [17:55<12:49,  1.29it/s]

Episode 2010, Reward: 112.2022
Policy Loss: -0.1452, Value Loss: 0.0735, Entropy Loss: -55.6520
Current Intensity: 2.1391, Max Intensity: 7.4353
---


 67%|██████▋   | 2015/3000 [17:58<10:12,  1.61it/s]

Episode 2015, Reward: 108.0711
Policy Loss: -0.1794, Value Loss: 0.0718, Entropy Loss: -56.1437
Current Intensity: 3.3902, Max Intensity: 8.7995
---


 67%|██████▋   | 2020/3000 [18:00<10:28,  1.56it/s]

Episode 2020, Reward: 104.9840
Policy Loss: -0.2517, Value Loss: 0.0675, Entropy Loss: -55.2586
Current Intensity: 2.2519, Max Intensity: 8.8121
---


 67%|██████▋   | 2024/3000 [18:01<06:07,  2.66it/s]

Episode 2025, Reward: 103.1819
Policy Loss: -0.1640, Value Loss: 0.1292, Entropy Loss: -55.9985
Current Intensity: 1.8997, Max Intensity: 6.9937
---


 68%|██████▊   | 2025/3000 [18:04<17:38,  1.09s/it]

Evaluation at episode 2025: Average Reward = 126.5122


 68%|██████▊   | 2030/3000 [18:06<10:55,  1.48it/s]

Episode 2030, Reward: 114.4024
Policy Loss: -0.0562, Value Loss: 0.0625, Entropy Loss: -55.1227
Current Intensity: 2.6679, Max Intensity: 8.8518
---


 68%|██████▊   | 2035/3000 [18:09<10:02,  1.60it/s]

Episode 2035, Reward: 105.8610
Policy Loss: -0.1490, Value Loss: 0.0440, Entropy Loss: -55.8545
Current Intensity: 1.5286, Max Intensity: 8.8109
---


 68%|██████▊   | 2040/3000 [18:11<10:08,  1.58it/s]

Episode 2040, Reward: 109.6565
Policy Loss: -0.0616, Value Loss: 0.0647, Entropy Loss: -55.6047
Current Intensity: 2.6322, Max Intensity: 7.3516
---


 68%|██████▊   | 2045/3000 [18:14<09:58,  1.59it/s]

Episode 2045, Reward: 109.3370
Policy Loss: -0.0282, Value Loss: 0.0609, Entropy Loss: -55.9693
Current Intensity: 3.1864, Max Intensity: 8.8427
---


 68%|██████▊   | 2049/3000 [18:15<06:12,  2.55it/s]

Episode 2050, Reward: 106.2526
Policy Loss: -0.1652, Value Loss: 0.0712, Entropy Loss: -55.4709
Current Intensity: 3.4055, Max Intensity: 6.9130
---


 68%|██████▊   | 2050/3000 [18:18<16:54,  1.07s/it]

Evaluation at episode 2050: Average Reward = 126.2032


 68%|██████▊   | 2055/3000 [18:20<10:27,  1.51it/s]

Episode 2055, Reward: 111.5107
Policy Loss: -0.1555, Value Loss: 0.0744, Entropy Loss: -56.3381
Current Intensity: 2.3261, Max Intensity: 8.9638
---


 69%|██████▊   | 2060/3000 [18:23<09:27,  1.66it/s]

Episode 2060, Reward: 107.4038
Policy Loss: -0.0987, Value Loss: 0.0870, Entropy Loss: -55.8136
Current Intensity: 3.2007, Max Intensity: 8.8214
---


 69%|██████▉   | 2065/3000 [18:25<09:28,  1.64it/s]

Episode 2065, Reward: 100.0555
Policy Loss: 0.0691, Value Loss: 0.0492, Entropy Loss: -55.5845
Current Intensity: 2.7577, Max Intensity: 7.5425
---


 69%|██████▉   | 2070/3000 [18:27<09:33,  1.62it/s]

Episode 2070, Reward: 105.6500
Policy Loss: 0.0025, Value Loss: 0.0689, Entropy Loss: -54.8913
Current Intensity: 2.0479, Max Intensity: 8.8064
---


 69%|██████▉   | 2074/3000 [18:29<05:53,  2.62it/s]

Episode 2075, Reward: 111.0285
Policy Loss: -0.0791, Value Loss: 0.1066, Entropy Loss: -55.7626
Current Intensity: 2.3156, Max Intensity: 8.7789
---


 69%|██████▉   | 2075/3000 [18:31<16:09,  1.05s/it]

Evaluation at episode 2075: Average Reward = 126.1292


 69%|██████▉   | 2080/3000 [18:34<10:19,  1.49it/s]

Episode 2080, Reward: 111.4393
Policy Loss: -0.1539, Value Loss: 0.0492, Entropy Loss: -55.9083
Current Intensity: 1.7973, Max Intensity: 6.5879
---


 70%|██████▉   | 2085/3000 [18:36<09:17,  1.64it/s]

Episode 2085, Reward: 114.1492
Policy Loss: -0.0832, Value Loss: 0.0786, Entropy Loss: -56.1579
Current Intensity: 4.0560, Max Intensity: 7.4290
---


 70%|██████▉   | 2090/3000 [18:38<09:10,  1.65it/s]

Episode 2090, Reward: 105.5965
Policy Loss: -0.1128, Value Loss: 0.0477, Entropy Loss: -54.7977
Current Intensity: 2.2264, Max Intensity: 6.3973
---


 70%|██████▉   | 2095/3000 [18:41<08:59,  1.68it/s]

Episode 2095, Reward: 107.7055
Policy Loss: -0.2591, Value Loss: 0.0517, Entropy Loss: -55.7743
Current Intensity: 2.1531, Max Intensity: 7.3973
---


 70%|██████▉   | 2099/3000 [18:42<05:52,  2.56it/s]

Episode 2100, Reward: 111.3403
Policy Loss: -0.2329, Value Loss: 0.0450, Entropy Loss: -54.7617
Current Intensity: 1.8481, Max Intensity: 7.4073
---


 70%|███████   | 2100/3000 [18:45<16:28,  1.10s/it]

Evaluation at episode 2100: Average Reward = 125.8121


 70%|███████   | 2105/3000 [18:47<09:37,  1.55it/s]

Episode 2105, Reward: 108.1646
Policy Loss: -0.2311, Value Loss: 0.0495, Entropy Loss: -53.6428
Current Intensity: 3.3315, Max Intensity: 7.0013
---


 70%|███████   | 2110/3000 [18:50<09:21,  1.59it/s]

Episode 2110, Reward: 110.3828
Policy Loss: -0.1099, Value Loss: 0.0552, Entropy Loss: -53.8274
Current Intensity: 4.5332, Max Intensity: 5.7146
---


 70%|███████   | 2115/3000 [18:52<08:24,  1.75it/s]

Episode 2115, Reward: 113.6441
Policy Loss: -0.2183, Value Loss: 0.0466, Entropy Loss: -53.8798
Current Intensity: 2.4605, Max Intensity: 6.6734
---


 71%|███████   | 2120/3000 [18:54<09:11,  1.60it/s]

Episode 2120, Reward: 109.2178
Policy Loss: -0.1990, Value Loss: 0.1003, Entropy Loss: -54.4847
Current Intensity: 4.0761, Max Intensity: 6.6293
---


 71%|███████   | 2124/3000 [18:56<05:52,  2.48it/s]

Episode 2125, Reward: 113.6144
Policy Loss: -0.2540, Value Loss: 0.0623, Entropy Loss: -53.5601
Current Intensity: 2.2786, Max Intensity: 6.5881
---


 71%|███████   | 2125/3000 [18:58<15:42,  1.08s/it]

Evaluation at episode 2125: Average Reward = 126.4990


 71%|███████   | 2130/3000 [19:01<09:45,  1.49it/s]

Episode 2130, Reward: 113.0024
Policy Loss: -0.2487, Value Loss: 0.0658, Entropy Loss: -54.4332
Current Intensity: 2.5324, Max Intensity: 8.1055
---


 71%|███████   | 2135/3000 [19:03<09:11,  1.57it/s]

Episode 2135, Reward: 108.9419
Policy Loss: -0.1716, Value Loss: 0.0561, Entropy Loss: -54.4191
Current Intensity: 5.2058, Max Intensity: 8.0429
---


 71%|███████▏  | 2140/3000 [19:06<09:01,  1.59it/s]

Episode 2140, Reward: 112.0815
Policy Loss: -0.0851, Value Loss: 0.0517, Entropy Loss: -54.9042
Current Intensity: 3.8898, Max Intensity: 8.0196
---


 72%|███████▏  | 2145/3000 [19:08<08:07,  1.75it/s]

Episode 2145, Reward: 115.6347
Policy Loss: -0.1405, Value Loss: 0.0474, Entropy Loss: -53.6987
Current Intensity: 3.2597, Max Intensity: 8.0260
---


 72%|███████▏  | 2149/3000 [19:09<05:37,  2.52it/s]

Episode 2150, Reward: 115.3437
Policy Loss: -0.0752, Value Loss: 0.0513, Entropy Loss: -53.9421
Current Intensity: 3.7853, Max Intensity: 7.4795
---


 72%|███████▏  | 2150/3000 [19:12<14:55,  1.05s/it]

Evaluation at episode 2150: Average Reward = 126.7195


 72%|███████▏  | 2155/3000 [19:14<09:44,  1.45it/s]

Episode 2155, Reward: 110.0273
Policy Loss: -0.2006, Value Loss: 0.1111, Entropy Loss: -55.6200
Current Intensity: 3.3119, Max Intensity: 8.0253
---


 72%|███████▏  | 2160/3000 [19:17<08:24,  1.67it/s]

Episode 2160, Reward: 111.9191
Policy Loss: -0.0871, Value Loss: 0.0595, Entropy Loss: -53.1711
Current Intensity: 2.6319, Max Intensity: 8.0586
---


 72%|███████▏  | 2165/3000 [19:19<08:18,  1.67it/s]

Episode 2165, Reward: 113.5756
Policy Loss: -0.1533, Value Loss: 0.0553, Entropy Loss: -54.6211
Current Intensity: 3.9366, Max Intensity: 8.0667
---


 72%|███████▏  | 2170/3000 [19:21<08:16,  1.67it/s]

Episode 2170, Reward: 112.3254
Policy Loss: -0.1219, Value Loss: 0.0455, Entropy Loss: -53.7396
Current Intensity: 1.8370, Max Intensity: 8.0296
---


 72%|███████▏  | 2174/3000 [19:23<05:08,  2.68it/s]

Episode 2175, Reward: 116.2911
Policy Loss: -0.1489, Value Loss: 0.0474, Entropy Loss: -53.8953
Current Intensity: 3.7393, Max Intensity: 8.0520
---


 72%|███████▎  | 2175/3000 [19:25<14:56,  1.09s/it]

Evaluation at episode 2175: Average Reward = 126.5435


 73%|███████▎  | 2180/3000 [19:28<08:49,  1.55it/s]

Episode 2180, Reward: 113.4641
Policy Loss: -0.1886, Value Loss: 0.0617, Entropy Loss: -54.1507
Current Intensity: 3.4343, Max Intensity: 8.0621
---


 73%|███████▎  | 2185/3000 [19:30<09:08,  1.49it/s]

Episode 2185, Reward: 113.2998
Policy Loss: -0.1540, Value Loss: 0.0893, Entropy Loss: -54.4512
Current Intensity: 3.0196, Max Intensity: 8.0924
---


 73%|███████▎  | 2190/3000 [19:32<07:48,  1.73it/s]

Episode 2190, Reward: 114.4488
Policy Loss: -0.1566, Value Loss: 0.0490, Entropy Loss: -54.5465
Current Intensity: 3.9479, Max Intensity: 8.0910
---


 73%|███████▎  | 2195/3000 [19:35<08:34,  1.56it/s]

Episode 2195, Reward: 111.0944
Policy Loss: -0.0470, Value Loss: 0.0306, Entropy Loss: -54.1505
Current Intensity: 3.3535, Max Intensity: 8.1095
---


 73%|███████▎  | 2199/3000 [19:36<05:28,  2.44it/s]

Episode 2200, Reward: 117.6917
Policy Loss: -0.1608, Value Loss: 0.0701, Entropy Loss: -54.2631
Current Intensity: 3.5801, Max Intensity: 8.0273
---


 73%|███████▎  | 2200/3000 [19:39<14:13,  1.07s/it]

Evaluation at episode 2200: Average Reward = 126.3712


 74%|███████▎  | 2205/3000 [19:42<09:43,  1.36it/s]

Episode 2205, Reward: 115.5011
Policy Loss: -0.1832, Value Loss: 0.0421, Entropy Loss: -54.7007
Current Intensity: 2.9316, Max Intensity: 6.6133
---


 74%|███████▎  | 2210/3000 [19:44<07:38,  1.72it/s]

Episode 2210, Reward: 116.2132
Policy Loss: -0.1745, Value Loss: 0.0392, Entropy Loss: -54.4104
Current Intensity: 3.5308, Max Intensity: 6.3858
---


 74%|███████▍  | 2215/3000 [19:46<08:53,  1.47it/s]

Episode 2215, Reward: 115.4714
Policy Loss: -0.0462, Value Loss: 0.0415, Entropy Loss: -53.9548
Current Intensity: 4.4986, Max Intensity: 6.8861
---


 74%|███████▍  | 2220/3000 [19:49<07:36,  1.71it/s]

Episode 2220, Reward: 111.7291
Policy Loss: -0.0820, Value Loss: 0.0502, Entropy Loss: -53.8562
Current Intensity: 3.5723, Max Intensity: 6.9447
---


 74%|███████▍  | 2224/3000 [19:50<04:46,  2.71it/s]

Episode 2225, Reward: 118.4467
Policy Loss: -0.1635, Value Loss: 0.0155, Entropy Loss: -53.7112
Current Intensity: 3.7034, Max Intensity: 6.8697
---


 74%|███████▍  | 2225/3000 [19:53<14:08,  1.10s/it]

Evaluation at episode 2225: Average Reward = 126.5885


 74%|███████▍  | 2230/3000 [19:55<08:47,  1.46it/s]

Episode 2230, Reward: 109.6304
Policy Loss: -0.0499, Value Loss: 0.0379, Entropy Loss: -54.7090
Current Intensity: 2.4490, Max Intensity: 6.3856
---


 74%|███████▍  | 2235/3000 [19:57<07:42,  1.65it/s]

Episode 2235, Reward: 105.1625
Policy Loss: -0.1378, Value Loss: 0.0629, Entropy Loss: -54.2996
Current Intensity: 1.8014, Max Intensity: 6.5605
---


 75%|███████▍  | 2240/3000 [20:00<07:19,  1.73it/s]

Episode 2240, Reward: 115.1899
Policy Loss: -0.1166, Value Loss: 0.0321, Entropy Loss: -54.1084
Current Intensity: 2.9885, Max Intensity: 5.8111
---


 75%|███████▍  | 2245/3000 [20:02<07:43,  1.63it/s]

Episode 2245, Reward: 109.6584
Policy Loss: -0.0205, Value Loss: 0.0673, Entropy Loss: -54.0144
Current Intensity: 1.7452, Max Intensity: 5.9312
---


 75%|███████▍  | 2249/3000 [20:03<04:36,  2.72it/s]

Episode 2250, Reward: 115.8310
Policy Loss: -0.0591, Value Loss: 0.0340, Entropy Loss: -54.3528
Current Intensity: 4.7181, Max Intensity: 5.9634
---


 75%|███████▌  | 2250/3000 [20:06<13:12,  1.06s/it]

Evaluation at episode 2250: Average Reward = 126.9692


 75%|███████▌  | 2255/3000 [20:08<07:57,  1.56it/s]

Episode 2255, Reward: 112.4463
Policy Loss: -0.1308, Value Loss: 0.0375, Entropy Loss: -53.5083
Current Intensity: 3.9340, Max Intensity: 8.0856
---


 75%|███████▌  | 2260/3000 [20:11<07:27,  1.65it/s]

Episode 2260, Reward: 115.1117
Policy Loss: -0.1083, Value Loss: 0.0345, Entropy Loss: -53.1956
Current Intensity: 3.7120, Max Intensity: 8.1055
---


 76%|███████▌  | 2265/3000 [20:13<07:29,  1.63it/s]

Episode 2265, Reward: 116.6817
Policy Loss: -0.1737, Value Loss: 0.0668, Entropy Loss: -53.9000
Current Intensity: 2.4714, Max Intensity: 8.0823
---


 76%|███████▌  | 2270/3000 [20:16<07:37,  1.59it/s]

Episode 2270, Reward: 117.0410
Policy Loss: -0.0847, Value Loss: 0.0246, Entropy Loss: -53.5946
Current Intensity: 5.5377, Max Intensity: 6.5756
---


 76%|███████▌  | 2274/3000 [20:17<05:06,  2.37it/s]

Episode 2275, Reward: 116.6004
Policy Loss: -0.2078, Value Loss: 0.0487, Entropy Loss: -53.6003
Current Intensity: 3.6008, Max Intensity: 8.0184
---


 76%|███████▌  | 2275/3000 [20:20<12:44,  1.05s/it]

Evaluation at episode 2275: Average Reward = 126.9571


 76%|███████▌  | 2280/3000 [20:23<09:51,  1.22it/s]

Episode 2280, Reward: 115.9362
Policy Loss: -0.2853, Value Loss: 0.0456, Entropy Loss: -54.1563
Current Intensity: 5.1637, Max Intensity: 7.0198
---


 76%|███████▌  | 2285/3000 [20:25<07:30,  1.59it/s]

Episode 2285, Reward: 114.2530
Policy Loss: -0.1723, Value Loss: 0.0298, Entropy Loss: -53.4406
Current Intensity: 3.7864, Max Intensity: 8.2928
---


 76%|███████▋  | 2290/3000 [20:28<08:38,  1.37it/s]

Episode 2290, Reward: 113.0840
Policy Loss: -0.0851, Value Loss: 0.0293, Entropy Loss: -53.3651
Current Intensity: 2.1737, Max Intensity: 8.2530
---


 76%|███████▋  | 2295/3000 [20:32<10:48,  1.09it/s]

Episode 2295, Reward: 114.3943
Policy Loss: -0.1367, Value Loss: 0.0463, Entropy Loss: -53.3844
Current Intensity: 2.4943, Max Intensity: 8.2622
---


 77%|███████▋  | 2299/3000 [20:33<05:40,  2.06it/s]

Episode 2300, Reward: 113.4192
Policy Loss: -0.1989, Value Loss: 0.0534, Entropy Loss: -53.1817
Current Intensity: 3.3798, Max Intensity: 8.2888
---


 77%|███████▋  | 2300/3000 [20:36<14:49,  1.27s/it]

Evaluation at episode 2300: Average Reward = 127.1433


 77%|███████▋  | 2305/3000 [20:39<08:46,  1.32it/s]

Episode 2305, Reward: 112.2309
Policy Loss: -0.1595, Value Loss: 0.0479, Entropy Loss: -53.9778
Current Intensity: 3.0023, Max Intensity: 8.1734
---


 77%|███████▋  | 2310/3000 [20:41<07:21,  1.56it/s]

Episode 2310, Reward: 110.3180
Policy Loss: -0.1069, Value Loss: 0.0533, Entropy Loss: -53.9737
Current Intensity: 2.7554, Max Intensity: 8.2000
---


 77%|███████▋  | 2315/3000 [20:44<06:55,  1.65it/s]

Episode 2315, Reward: 112.8697
Policy Loss: -0.2416, Value Loss: 0.0426, Entropy Loss: -54.6966
Current Intensity: 4.0462, Max Intensity: 8.1399
---


 77%|███████▋  | 2320/3000 [20:46<06:48,  1.66it/s]

Episode 2320, Reward: 114.0297
Policy Loss: -0.0908, Value Loss: 0.0509, Entropy Loss: -54.8731
Current Intensity: 1.6243, Max Intensity: 8.1598
---


 77%|███████▋  | 2324/3000 [20:47<04:21,  2.59it/s]

Episode 2325, Reward: 114.6424
Policy Loss: -0.1352, Value Loss: 0.0412, Entropy Loss: -54.4895
Current Intensity: 1.9574, Max Intensity: 8.1490
---


 78%|███████▊  | 2325/3000 [20:50<11:55,  1.06s/it]

Evaluation at episode 2325: Average Reward = 126.0622


 78%|███████▊  | 2330/3000 [20:53<07:53,  1.42it/s]

Episode 2330, Reward: 108.0290
Policy Loss: -0.1652, Value Loss: 0.0629, Entropy Loss: -55.5135
Current Intensity: 5.3566, Max Intensity: 7.9120
---


 78%|███████▊  | 2335/3000 [20:55<06:48,  1.63it/s]

Episode 2335, Reward: 110.3671
Policy Loss: -0.2554, Value Loss: 0.0557, Entropy Loss: -54.1198
Current Intensity: 1.9645, Max Intensity: 7.7831
---


 78%|███████▊  | 2340/3000 [20:57<06:53,  1.60it/s]

Episode 2340, Reward: 112.0413
Policy Loss: -0.2481, Value Loss: 0.0742, Entropy Loss: -54.4455
Current Intensity: 2.6655, Max Intensity: 7.8533
---


 78%|███████▊  | 2345/3000 [21:00<06:30,  1.68it/s]

Episode 2345, Reward: 112.1601
Policy Loss: -0.0884, Value Loss: 0.0504, Entropy Loss: -55.3110
Current Intensity: 4.2776, Max Intensity: 7.8177
---


 78%|███████▊  | 2349/3000 [21:01<04:04,  2.66it/s]

Episode 2350, Reward: 107.7292
Policy Loss: -0.3460, Value Loss: 0.0377, Entropy Loss: -54.5427
Current Intensity: 3.4088, Max Intensity: 6.5854
---


 78%|███████▊  | 2350/3000 [21:04<11:33,  1.07s/it]

Evaluation at episode 2350: Average Reward = 125.5003


 78%|███████▊  | 2355/3000 [21:06<07:48,  1.38it/s]

Episode 2355, Reward: 112.9599
Policy Loss: -0.0930, Value Loss: 0.0357, Entropy Loss: -54.2202
Current Intensity: 4.6071, Max Intensity: 7.7321
---


 79%|███████▊  | 2360/3000 [21:09<06:24,  1.66it/s]

Episode 2360, Reward: 112.1318
Policy Loss: -0.1770, Value Loss: 0.0518, Entropy Loss: -54.4247
Current Intensity: 5.7170, Max Intensity: 6.0676
---


 79%|███████▉  | 2365/3000 [21:11<06:18,  1.68it/s]

Episode 2365, Reward: 114.9489
Policy Loss: -0.0525, Value Loss: 0.0331, Entropy Loss: -53.4438
Current Intensity: 3.0992, Max Intensity: 7.8400
---


 79%|███████▉  | 2370/3000 [21:13<06:14,  1.68it/s]

Episode 2370, Reward: 112.9740
Policy Loss: -0.0131, Value Loss: 0.0521, Entropy Loss: -53.5203
Current Intensity: 3.1614, Max Intensity: 7.8655
---


 79%|███████▉  | 2374/3000 [21:15<04:04,  2.56it/s]

Episode 2375, Reward: 110.1455
Policy Loss: -0.0857, Value Loss: 0.0478, Entropy Loss: -53.6676
Current Intensity: 2.9711, Max Intensity: 7.8653
---


 79%|███████▉  | 2375/3000 [21:17<11:18,  1.08s/it]

Evaluation at episode 2375: Average Reward = 126.7779


 79%|███████▉  | 2380/3000 [21:20<06:58,  1.48it/s]

Episode 2380, Reward: 114.6854
Policy Loss: -0.1972, Value Loss: 0.0338, Entropy Loss: -53.2665
Current Intensity: 2.8218, Max Intensity: 7.6894
---


 80%|███████▉  | 2385/3000 [21:22<06:46,  1.51it/s]

Episode 2385, Reward: 117.1192
Policy Loss: -0.2353, Value Loss: 0.0616, Entropy Loss: -54.3398
Current Intensity: 3.9918, Max Intensity: 7.7263
---


 80%|███████▉  | 2390/3000 [21:25<05:47,  1.76it/s]

Episode 2390, Reward: 116.9977
Policy Loss: -0.2073, Value Loss: 0.0275, Entropy Loss: -53.0554
Current Intensity: 3.7365, Max Intensity: 7.7554
---


 80%|███████▉  | 2395/3000 [21:27<06:13,  1.62it/s]

Episode 2395, Reward: 117.6646
Policy Loss: -0.1666, Value Loss: 0.0452, Entropy Loss: -53.4805
Current Intensity: 3.6466, Max Intensity: 7.6746
---


 80%|███████▉  | 2399/3000 [21:28<03:56,  2.54it/s]

Episode 2400, Reward: 120.0489
Policy Loss: -0.2032, Value Loss: 0.0329, Entropy Loss: -52.5972
Current Intensity: 3.1524, Max Intensity: 7.6828
---


 80%|████████  | 2400/3000 [21:31<10:42,  1.07s/it]

Evaluation at episode 2400: Average Reward = 126.0797


 80%|████████  | 2405/3000 [21:34<06:53,  1.44it/s]

Episode 2405, Reward: 116.3068
Policy Loss: -0.1539, Value Loss: 0.0330, Entropy Loss: -53.2723
Current Intensity: 3.8966, Max Intensity: 7.8066
---


 80%|████████  | 2410/3000 [21:36<06:20,  1.55it/s]

Episode 2410, Reward: 117.9559
Policy Loss: 0.0289, Value Loss: 0.0253, Entropy Loss: -53.3204
Current Intensity: 4.4704, Max Intensity: 6.1611
---


 80%|████████  | 2415/3000 [21:38<05:57,  1.64it/s]

Episode 2415, Reward: 117.5006
Policy Loss: -0.3161, Value Loss: 0.0214, Entropy Loss: -53.4335
Current Intensity: 3.0723, Max Intensity: 6.5763
---


 81%|████████  | 2420/3000 [21:41<05:52,  1.65it/s]

Episode 2420, Reward: 118.4368
Policy Loss: -0.2343, Value Loss: 0.0250, Entropy Loss: -53.6656
Current Intensity: 3.9131, Max Intensity: 6.0311
---


 81%|████████  | 2424/3000 [21:42<03:42,  2.59it/s]

Episode 2425, Reward: 112.1205
Policy Loss: -0.3042, Value Loss: 0.0376, Entropy Loss: -53.5810
Current Intensity: 4.4823, Max Intensity: 6.5032
---


 81%|████████  | 2425/3000 [21:45<09:56,  1.04s/it]

Evaluation at episode 2425: Average Reward = 127.1140


 81%|████████  | 2430/3000 [21:47<06:46,  1.40it/s]

Episode 2430, Reward: 117.3982
Policy Loss: -0.1068, Value Loss: 0.0241, Entropy Loss: -52.7597
Current Intensity: 3.1561, Max Intensity: 7.2674
---


 81%|████████  | 2435/3000 [21:49<05:43,  1.65it/s]

Episode 2435, Reward: 118.7891
Policy Loss: -0.0611, Value Loss: 0.0326, Entropy Loss: -52.5462
Current Intensity: 2.3727, Max Intensity: 7.8234
---


 81%|████████▏ | 2440/3000 [21:52<05:53,  1.58it/s]

Episode 2440, Reward: 117.3462
Policy Loss: -0.1821, Value Loss: 0.0336, Entropy Loss: -52.9969
Current Intensity: 4.0305, Max Intensity: 6.3751
---


 82%|████████▏ | 2445/3000 [21:54<05:51,  1.58it/s]

Episode 2445, Reward: 117.9891
Policy Loss: -0.0902, Value Loss: 0.0339, Entropy Loss: -53.4068
Current Intensity: 4.6211, Max Intensity: 6.4458
---


 82%|████████▏ | 2449/3000 [21:56<03:28,  2.64it/s]

Episode 2450, Reward: 118.5270
Policy Loss: -0.1156, Value Loss: 0.0370, Entropy Loss: -53.3648
Current Intensity: 3.4800, Max Intensity: 6.2858
---


 82%|████████▏ | 2450/3000 [21:59<12:05,  1.32s/it]

Evaluation at episode 2450: Average Reward = 127.2477


 82%|████████▏ | 2455/3000 [22:01<06:22,  1.43it/s]

Episode 2455, Reward: 118.8762
Policy Loss: -0.0268, Value Loss: 0.0374, Entropy Loss: -53.5530
Current Intensity: 4.3381, Max Intensity: 6.7271
---


 82%|████████▏ | 2460/3000 [22:04<06:26,  1.40it/s]

Episode 2460, Reward: 119.2273
Policy Loss: -0.0717, Value Loss: 0.0199, Entropy Loss: -52.5891
Current Intensity: 3.4020, Max Intensity: 8.3634
---


 82%|████████▏ | 2465/3000 [22:07<05:16,  1.69it/s]

Episode 2465, Reward: 119.9951
Policy Loss: -0.1004, Value Loss: 0.0345, Entropy Loss: -51.9258
Current Intensity: 4.7732, Max Intensity: 8.3662
---


 82%|████████▏ | 2470/3000 [22:09<05:22,  1.64it/s]

Episode 2470, Reward: 122.0239
Policy Loss: -0.1332, Value Loss: 0.0174, Entropy Loss: -51.3681
Current Intensity: 3.5870, Max Intensity: 8.4207
---


 82%|████████▏ | 2474/3000 [22:10<03:05,  2.84it/s]

Episode 2475, Reward: 120.2469
Policy Loss: -0.0755, Value Loss: 0.0235, Entropy Loss: -52.1046
Current Intensity: 3.7861, Max Intensity: 8.4214
---


 82%|████████▎ | 2475/3000 [22:13<09:32,  1.09s/it]

Evaluation at episode 2475: Average Reward = 127.4767


 83%|████████▎ | 2480/3000 [22:15<05:34,  1.55it/s]

Episode 2480, Reward: 121.3847
Policy Loss: -0.2207, Value Loss: 0.0148, Entropy Loss: -51.8120
Current Intensity: 3.3926, Max Intensity: 8.5147
---


 83%|████████▎ | 2485/3000 [22:18<05:26,  1.58it/s]

Episode 2485, Reward: 115.7468
Policy Loss: -0.1521, Value Loss: 0.0229, Entropy Loss: -51.9735
Current Intensity: 3.6774, Max Intensity: 8.5480
---


 83%|████████▎ | 2490/3000 [22:20<04:55,  1.72it/s]

Episode 2490, Reward: 121.3315
Policy Loss: -0.0904, Value Loss: 0.0166, Entropy Loss: -51.0000
Current Intensity: 4.7480, Max Intensity: 8.5226
---


 83%|████████▎ | 2495/3000 [22:23<04:58,  1.69it/s]

Episode 2495, Reward: 120.7959
Policy Loss: -0.1833, Value Loss: 0.0128, Entropy Loss: -50.5937
Current Intensity: 5.8296, Max Intensity: 8.4887
---


 83%|████████▎ | 2499/3000 [22:24<03:34,  2.34it/s]

Episode 2500, Reward: 120.8295
Policy Loss: -0.1736, Value Loss: 0.0273, Entropy Loss: -50.9136
Current Intensity: 3.1036, Max Intensity: 7.3831
---


 83%|████████▎ | 2500/3000 [22:27<09:05,  1.09s/it]

Evaluation at episode 2500: Average Reward = 126.4531


 84%|████████▎ | 2505/3000 [22:30<06:25,  1.28it/s]

Episode 2505, Reward: 113.7617
Policy Loss: -0.1636, Value Loss: 0.0373, Entropy Loss: -51.9090
Current Intensity: 3.4671, Max Intensity: 6.6794
---


 84%|████████▎ | 2510/3000 [22:32<04:52,  1.67it/s]

Episode 2510, Reward: 112.6211
Policy Loss: -0.0829, Value Loss: 0.0474, Entropy Loss: -51.5973
Current Intensity: 3.0167, Max Intensity: 6.6761
---


 84%|████████▍ | 2515/3000 [22:34<05:21,  1.51it/s]

Episode 2515, Reward: 113.8122
Policy Loss: -0.0535, Value Loss: 0.0279, Entropy Loss: -51.1316
Current Intensity: 4.3023, Max Intensity: 6.7032
---


 84%|████████▍ | 2520/3000 [22:37<04:46,  1.67it/s]

Episode 2520, Reward: 112.0304
Policy Loss: -0.1098, Value Loss: 0.0259, Entropy Loss: -51.3029
Current Intensity: 2.6737, Max Intensity: 5.1707
---


 84%|████████▍ | 2524/3000 [22:38<03:04,  2.57it/s]

Episode 2525, Reward: 119.7102
Policy Loss: -0.0087, Value Loss: 0.0350, Entropy Loss: -50.7540
Current Intensity: 5.4975, Max Intensity: 7.0768
---


 84%|████████▍ | 2525/3000 [22:41<08:30,  1.07s/it]

Evaluation at episode 2525: Average Reward = 126.6956


 84%|████████▍ | 2530/3000 [22:43<05:05,  1.54it/s]

Episode 2530, Reward: 111.9543
Policy Loss: -0.1231, Value Loss: 0.0714, Entropy Loss: -51.1537
Current Intensity: 3.6298, Max Intensity: 5.6678
---


 84%|████████▍ | 2535/3000 [22:46<04:52,  1.59it/s]

Episode 2535, Reward: 106.4666
Policy Loss: -0.1387, Value Loss: 0.0555, Entropy Loss: -51.2067
Current Intensity: 3.6643, Max Intensity: 5.9709
---


 85%|████████▍ | 2540/3000 [22:48<04:27,  1.72it/s]

Episode 2540, Reward: 119.8864
Policy Loss: -0.1775, Value Loss: 0.0637, Entropy Loss: -51.5471
Current Intensity: 3.4162, Max Intensity: 6.5208
---


 85%|████████▍ | 2545/3000 [22:50<04:56,  1.53it/s]

Episode 2545, Reward: 120.7913
Policy Loss: -0.0571, Value Loss: 0.0313, Entropy Loss: -50.5233
Current Intensity: 4.3895, Max Intensity: 6.7757
---


 85%|████████▍ | 2549/3000 [22:52<02:46,  2.71it/s]

Episode 2550, Reward: 119.3177
Policy Loss: -0.2688, Value Loss: 0.0302, Entropy Loss: -50.9913
Current Intensity: 2.9214, Max Intensity: 6.0054
---


 85%|████████▌ | 2550/3000 [22:54<08:05,  1.08s/it]

Evaluation at episode 2550: Average Reward = 126.4524


 85%|████████▌ | 2555/3000 [22:57<04:57,  1.50it/s]

Episode 2555, Reward: 117.4467
Policy Loss: -0.0474, Value Loss: 0.0261, Entropy Loss: -51.4797
Current Intensity: 3.8798, Max Intensity: 7.7672
---


 85%|████████▌ | 2560/3000 [22:59<04:32,  1.61it/s]

Episode 2560, Reward: 119.9912
Policy Loss: -0.3117, Value Loss: 0.0321, Entropy Loss: -51.1759
Current Intensity: 3.1356, Max Intensity: 7.8254
---


 86%|████████▌ | 2565/3000 [23:01<04:05,  1.77it/s]

Episode 2565, Reward: 119.7442
Policy Loss: -0.3024, Value Loss: 0.0258, Entropy Loss: -51.3152
Current Intensity: 4.2811, Max Intensity: 7.8717
---


 86%|████████▌ | 2570/3000 [23:04<04:32,  1.58it/s]

Episode 2570, Reward: 117.9441
Policy Loss: -0.1612, Value Loss: 0.0281, Entropy Loss: -51.9700
Current Intensity: 3.1795, Max Intensity: 7.8662
---


 86%|████████▌ | 2574/3000 [23:05<02:57,  2.40it/s]

Episode 2575, Reward: 120.5791
Policy Loss: -0.2050, Value Loss: 0.0216, Entropy Loss: -51.1583
Current Intensity: 4.3064, Max Intensity: 7.8391
---


 86%|████████▌ | 2575/3000 [23:08<07:34,  1.07s/it]

Evaluation at episode 2575: Average Reward = 126.2242


 86%|████████▌ | 2580/3000 [23:10<05:09,  1.36it/s]

Episode 2580, Reward: 114.7909
Policy Loss: -0.3199, Value Loss: 0.0534, Entropy Loss: -50.7806
Current Intensity: 2.0533, Max Intensity: 7.0974
---


 86%|████████▌ | 2585/3000 [23:13<04:05,  1.69it/s]

Episode 2585, Reward: 111.0025
Policy Loss: -0.2416, Value Loss: 0.0202, Entropy Loss: -50.0672
Current Intensity: 3.7822, Max Intensity: 5.8720
---


 86%|████████▋ | 2590/3000 [23:15<04:09,  1.64it/s]

Episode 2590, Reward: 113.6294
Policy Loss: -0.1604, Value Loss: 0.0325, Entropy Loss: -50.6470
Current Intensity: 4.0255, Max Intensity: 6.0372
---


 86%|████████▋ | 2595/3000 [23:17<03:59,  1.69it/s]

Episode 2595, Reward: 113.8959
Policy Loss: -0.0619, Value Loss: 0.0491, Entropy Loss: -49.1894
Current Intensity: 3.8681, Max Intensity: 7.1142
---


 87%|████████▋ | 2599/3000 [23:19<02:44,  2.43it/s]

Episode 2600, Reward: 117.0988
Policy Loss: -0.1422, Value Loss: 0.0454, Entropy Loss: -49.8801
Current Intensity: 4.3769, Max Intensity: 6.3897
---


 87%|████████▋ | 2600/3000 [23:22<07:15,  1.09s/it]

Evaluation at episode 2600: Average Reward = 126.8017


 87%|████████▋ | 2605/3000 [23:24<04:37,  1.42it/s]

Episode 2605, Reward: 113.4982
Policy Loss: -0.2937, Value Loss: 0.0375, Entropy Loss: -49.9760
Current Intensity: 2.6391, Max Intensity: 5.7704
---


 87%|████████▋ | 2610/3000 [23:27<04:07,  1.58it/s]

Episode 2610, Reward: 118.4548
Policy Loss: -0.1353, Value Loss: 0.0380, Entropy Loss: -50.0064
Current Intensity: 4.0901, Max Intensity: 6.2184
---


 87%|████████▋ | 2615/3000 [23:29<03:56,  1.63it/s]

Episode 2615, Reward: 119.7748
Policy Loss: -0.1754, Value Loss: 0.0309, Entropy Loss: -49.8251
Current Intensity: 4.6880, Max Intensity: 7.4008
---


 87%|████████▋ | 2620/3000 [23:32<03:58,  1.59it/s]

Episode 2620, Reward: 120.5058
Policy Loss: -0.1751, Value Loss: 0.0191, Entropy Loss: -49.7367
Current Intensity: 4.7740, Max Intensity: 7.3843
---


 87%|████████▋ | 2624/3000 [23:33<02:20,  2.67it/s]

Episode 2625, Reward: 119.6967
Policy Loss: -0.1712, Value Loss: 0.0194, Entropy Loss: -49.7416
Current Intensity: 2.1818, Max Intensity: 6.5817
---


 88%|████████▊ | 2625/3000 [23:36<06:57,  1.11s/it]

Evaluation at episode 2625: Average Reward = 127.2464


 88%|████████▊ | 2630/3000 [23:38<04:17,  1.44it/s]

Episode 2630, Reward: 121.5358
Policy Loss: -0.1648, Value Loss: 0.0230, Entropy Loss: -50.0851
Current Intensity: 2.9148, Max Intensity: 8.6587
---


 88%|████████▊ | 2635/3000 [23:41<03:51,  1.58it/s]

Episode 2635, Reward: 122.3447
Policy Loss: -0.1798, Value Loss: 0.0168, Entropy Loss: -49.3539
Current Intensity: 4.1854, Max Intensity: 8.6622
---


 88%|████████▊ | 2640/3000 [23:43<03:36,  1.66it/s]

Episode 2640, Reward: 117.4877
Policy Loss: -0.1464, Value Loss: 0.0231, Entropy Loss: -50.2531
Current Intensity: 3.2396, Max Intensity: 8.6550
---


 88%|████████▊ | 2645/3000 [23:45<03:41,  1.61it/s]

Episode 2645, Reward: 121.9377
Policy Loss: -0.1618, Value Loss: 0.0163, Entropy Loss: -49.4153
Current Intensity: 2.2754, Max Intensity: 8.7142
---


 88%|████████▊ | 2649/3000 [23:47<02:13,  2.62it/s]

Episode 2650, Reward: 120.8760
Policy Loss: -0.1102, Value Loss: 0.0121, Entropy Loss: -49.3227
Current Intensity: 4.4886, Max Intensity: 8.7017
---


 88%|████████▊ | 2650/3000 [23:49<06:14,  1.07s/it]

Evaluation at episode 2650: Average Reward = 127.2742


 88%|████████▊ | 2655/3000 [23:52<03:52,  1.49it/s]

Episode 2655, Reward: 121.9622
Policy Loss: -0.2477, Value Loss: 0.0167, Entropy Loss: -48.9153
Current Intensity: 3.3296, Max Intensity: 8.6357
---


 89%|████████▊ | 2660/3000 [23:54<03:25,  1.65it/s]

Episode 2660, Reward: 120.4039
Policy Loss: -0.2628, Value Loss: 0.0163, Entropy Loss: -48.6454
Current Intensity: 5.5457, Max Intensity: 8.7184
---


 89%|████████▉ | 2665/3000 [23:56<03:17,  1.70it/s]

Episode 2665, Reward: 122.1981
Policy Loss: -0.1244, Value Loss: 0.0124, Entropy Loss: -48.8704
Current Intensity: 3.7242, Max Intensity: 8.7329
---


 89%|████████▉ | 2670/3000 [23:59<03:15,  1.69it/s]

Episode 2670, Reward: 122.9266
Policy Loss: -0.0785, Value Loss: 0.0160, Entropy Loss: -48.8716
Current Intensity: 5.0403, Max Intensity: 8.7822
---


 89%|████████▉ | 2674/3000 [24:00<02:08,  2.53it/s]

Episode 2675, Reward: 121.0039
Policy Loss: -0.0749, Value Loss: 0.0084, Entropy Loss: -48.3137
Current Intensity: 3.4878, Max Intensity: 8.7400
---


 89%|████████▉ | 2675/3000 [24:03<05:50,  1.08s/it]

Evaluation at episode 2675: Average Reward = 127.2168


 89%|████████▉ | 2680/3000 [24:05<03:37,  1.47it/s]

Episode 2680, Reward: 122.2749
Policy Loss: -0.0493, Value Loss: 0.0321, Entropy Loss: -49.0632
Current Intensity: 4.3540, Max Intensity: 8.9306
---


 90%|████████▉ | 2685/3000 [24:08<03:03,  1.71it/s]

Episode 2685, Reward: 120.6445
Policy Loss: -0.1771, Value Loss: 0.0129, Entropy Loss: -48.7607
Current Intensity: 4.0263, Max Intensity: 8.9014
---


 90%|████████▉ | 2690/3000 [24:10<03:16,  1.58it/s]

Episode 2690, Reward: 117.8461
Policy Loss: -0.1872, Value Loss: 0.0133, Entropy Loss: -49.3844
Current Intensity: 2.4076, Max Intensity: 8.9482
---


 90%|████████▉ | 2695/3000 [24:12<02:54,  1.75it/s]

Episode 2695, Reward: 119.9808
Policy Loss: -0.1944, Value Loss: 0.0283, Entropy Loss: -48.4417
Current Intensity: 4.1080, Max Intensity: 8.9271
---


 90%|████████▉ | 2699/3000 [24:14<01:58,  2.55it/s]

Episode 2700, Reward: 121.7910
Policy Loss: 0.0349, Value Loss: 0.0130, Entropy Loss: -48.1891
Current Intensity: 5.3827, Max Intensity: 8.1095
---


 90%|█████████ | 2700/3000 [24:17<05:31,  1.10s/it]

Evaluation at episode 2700: Average Reward = 127.2138


 90%|█████████ | 2705/3000 [24:19<03:25,  1.43it/s]

Episode 2705, Reward: 117.4643
Policy Loss: -0.1343, Value Loss: 0.0151, Entropy Loss: -48.5965
Current Intensity: 3.1090, Max Intensity: 7.9331
---


 90%|█████████ | 2710/3000 [24:21<02:57,  1.63it/s]

Episode 2710, Reward: 117.2070
Policy Loss: -0.0497, Value Loss: 0.0204, Entropy Loss: -47.8728
Current Intensity: 4.0783, Max Intensity: 6.7756
---


 90%|█████████ | 2715/3000 [24:24<02:58,  1.59it/s]

Episode 2715, Reward: 116.0994
Policy Loss: -0.1143, Value Loss: 0.0155, Entropy Loss: -48.6705
Current Intensity: 3.9463, Max Intensity: 7.0837
---


 91%|█████████ | 2720/3000 [24:26<02:51,  1.64it/s]

Episode 2720, Reward: 119.1011
Policy Loss: -0.2049, Value Loss: 0.0152, Entropy Loss: -48.3070
Current Intensity: 5.8037, Max Intensity: 7.8140
---


 91%|█████████ | 2724/3000 [24:27<01:39,  2.77it/s]

Episode 2725, Reward: 119.3144
Policy Loss: -0.0687, Value Loss: 0.0166, Entropy Loss: -48.1242
Current Intensity: 4.5634, Max Intensity: 7.7098
---


 91%|█████████ | 2725/3000 [24:30<04:52,  1.06s/it]

Evaluation at episode 2725: Average Reward = 127.2156


 91%|█████████ | 2730/3000 [24:32<02:56,  1.53it/s]

Episode 2730, Reward: 120.8554
Policy Loss: -0.2165, Value Loss: 0.0303, Entropy Loss: -48.6227
Current Intensity: 5.9225, Max Intensity: 8.3297
---


 91%|█████████ | 2735/3000 [24:35<02:51,  1.55it/s]

Episode 2735, Reward: 120.3559
Policy Loss: -0.2267, Value Loss: 0.0416, Entropy Loss: -48.9711
Current Intensity: 5.4641, Max Intensity: 6.6556
---


 91%|█████████▏| 2740/3000 [24:37<02:35,  1.67it/s]

Episode 2740, Reward: 120.1052
Policy Loss: -0.2296, Value Loss: 0.0160, Entropy Loss: -49.1165
Current Intensity: 5.6751, Max Intensity: 7.0918
---


 92%|█████████▏| 2745/3000 [24:40<02:46,  1.53it/s]

Episode 2745, Reward: 116.7727
Policy Loss: -0.1517, Value Loss: 0.0209, Entropy Loss: -48.6780
Current Intensity: 2.5863, Max Intensity: 7.2409
---


 92%|█████████▏| 2749/3000 [24:41<01:42,  2.46it/s]

Episode 2750, Reward: 120.0165
Policy Loss: -0.0610, Value Loss: 0.0088, Entropy Loss: -49.0442
Current Intensity: 2.9571, Max Intensity: 6.7383
---


 92%|█████████▏| 2750/3000 [24:44<04:13,  1.01s/it]

Evaluation at episode 2750: Average Reward = 127.1063


 92%|█████████▏| 2755/3000 [24:46<02:56,  1.39it/s]

Episode 2755, Reward: 120.0867
Policy Loss: -0.1351, Value Loss: 0.0201, Entropy Loss: -48.9889
Current Intensity: 3.3390, Max Intensity: 7.6254
---


 92%|█████████▏| 2760/3000 [24:48<02:17,  1.74it/s]

Episode 2760, Reward: 121.6358
Policy Loss: -0.1494, Value Loss: 0.0289, Entropy Loss: -49.2724
Current Intensity: 3.9109, Max Intensity: 8.1907
---


 92%|█████████▏| 2765/3000 [24:51<02:40,  1.47it/s]

Episode 2765, Reward: 119.2003
Policy Loss: -0.1350, Value Loss: 0.0107, Entropy Loss: -48.6211
Current Intensity: 3.9336, Max Intensity: 8.2815
---


 92%|█████████▏| 2770/3000 [24:53<02:12,  1.74it/s]

Episode 2770, Reward: 120.5930
Policy Loss: -0.1748, Value Loss: 0.0177, Entropy Loss: -48.3963
Current Intensity: 4.3591, Max Intensity: 8.3140
---


 92%|█████████▏| 2774/3000 [24:55<01:23,  2.72it/s]

Episode 2775, Reward: 119.8808
Policy Loss: -0.0336, Value Loss: 0.0146, Entropy Loss: -49.2283
Current Intensity: 3.8563, Max Intensity: 8.2505
---


 92%|█████████▎| 2775/3000 [24:57<04:12,  1.12s/it]

Evaluation at episode 2775: Average Reward = 126.4333


 93%|█████████▎| 2780/3000 [25:00<02:24,  1.52it/s]

Episode 2780, Reward: 117.5227
Policy Loss: -0.2782, Value Loss: 0.0313, Entropy Loss: -47.4767
Current Intensity: 3.7014, Max Intensity: 6.5751
---


 93%|█████████▎| 2785/3000 [25:02<02:18,  1.56it/s]

Episode 2785, Reward: 113.9197
Policy Loss: -0.0881, Value Loss: 0.0380, Entropy Loss: -47.5412
Current Intensity: 3.9699, Max Intensity: 6.5732
---


 93%|█████████▎| 2790/3000 [25:05<02:04,  1.69it/s]

Episode 2790, Reward: 113.0492
Policy Loss: -0.0923, Value Loss: 0.0456, Entropy Loss: -47.7804
Current Intensity: 4.0652, Max Intensity: 6.5169
---


 93%|█████████▎| 2795/3000 [25:07<02:09,  1.59it/s]

Episode 2795, Reward: 116.4446
Policy Loss: -0.1747, Value Loss: 0.0371, Entropy Loss: -47.5189
Current Intensity: 3.4879, Max Intensity: 6.4713
---


 93%|█████████▎| 2799/3000 [25:08<01:16,  2.62it/s]

Episode 2800, Reward: 118.7084
Policy Loss: -0.0755, Value Loss: 0.0204, Entropy Loss: -47.4763
Current Intensity: 2.1857, Max Intensity: 6.6393
---


 93%|█████████▎| 2800/3000 [25:11<03:40,  1.10s/it]

Evaluation at episode 2800: Average Reward = 126.1056


 94%|█████████▎| 2805/3000 [25:13<02:08,  1.52it/s]

Episode 2805, Reward: 119.5093
Policy Loss: -0.0707, Value Loss: 0.0210, Entropy Loss: -47.7688
Current Intensity: 4.0189, Max Intensity: 6.7512
---


 94%|█████████▎| 2810/3000 [25:16<02:01,  1.57it/s]

Episode 2810, Reward: 111.1092
Policy Loss: -0.0385, Value Loss: 0.0392, Entropy Loss: -47.9995
Current Intensity: 2.8244, Max Intensity: 5.8581
---


 94%|█████████▍| 2815/3000 [25:18<01:53,  1.63it/s]

Episode 2815, Reward: 117.5267
Policy Loss: -0.0504, Value Loss: 0.0263, Entropy Loss: -47.0760
Current Intensity: 4.2168, Max Intensity: 6.7819
---


 94%|█████████▍| 2820/3000 [25:21<01:44,  1.72it/s]

Episode 2820, Reward: 116.6921
Policy Loss: -0.2066, Value Loss: 0.0318, Entropy Loss: -47.4185
Current Intensity: 3.0849, Max Intensity: 6.8235
---


 94%|█████████▍| 2824/3000 [25:22<01:12,  2.42it/s]

Episode 2825, Reward: 115.3569
Policy Loss: -0.2482, Value Loss: 0.0175, Entropy Loss: -46.8952
Current Intensity: 5.0407, Max Intensity: 5.8539
---


 94%|█████████▍| 2825/3000 [25:25<03:03,  1.05s/it]

Evaluation at episode 2825: Average Reward = 125.9892


 94%|█████████▍| 2830/3000 [25:27<02:11,  1.30it/s]

Episode 2830, Reward: 116.7512
Policy Loss: -0.1581, Value Loss: 0.0210, Entropy Loss: -46.5988
Current Intensity: 4.6591, Max Intensity: 7.7389
---


 94%|█████████▍| 2835/3000 [25:30<01:38,  1.68it/s]

Episode 2835, Reward: 116.4240
Policy Loss: -0.0061, Value Loss: 0.0323, Entropy Loss: -47.4056
Current Intensity: 4.7098, Max Intensity: 7.6861
---


 95%|█████████▍| 2840/3000 [25:32<01:43,  1.55it/s]

Episode 2840, Reward: 120.6024
Policy Loss: -0.0548, Value Loss: 0.0137, Entropy Loss: -46.3393
Current Intensity: 3.2033, Max Intensity: 7.7244
---


 95%|█████████▍| 2845/3000 [25:35<01:29,  1.73it/s]

Episode 2845, Reward: 116.7353
Policy Loss: -0.2326, Value Loss: 0.0190, Entropy Loss: -46.8406
Current Intensity: 4.6074, Max Intensity: 7.7224
---


 95%|█████████▍| 2849/3000 [25:36<00:57,  2.60it/s]

Episode 2850, Reward: 118.6853
Policy Loss: -0.2087, Value Loss: 0.0133, Entropy Loss: -46.9868
Current Intensity: 4.5658, Max Intensity: 6.5980
---


 95%|█████████▌| 2850/3000 [25:39<02:45,  1.10s/it]

Evaluation at episode 2850: Average Reward = 126.1379


 95%|█████████▌| 2855/3000 [25:41<01:36,  1.50it/s]

Episode 2855, Reward: 120.9312
Policy Loss: -0.1386, Value Loss: 0.0172, Entropy Loss: -46.5729
Current Intensity: 3.9557, Max Intensity: 7.7721
---


 95%|█████████▌| 2860/3000 [25:43<01:30,  1.55it/s]

Episode 2860, Reward: 119.2094
Policy Loss: -0.1704, Value Loss: 0.0160, Entropy Loss: -45.8593
Current Intensity: 4.7942, Max Intensity: 7.6833
---


 96%|█████████▌| 2865/3000 [25:46<01:17,  1.74it/s]

Episode 2865, Reward: 117.7979
Policy Loss: -0.1142, Value Loss: 0.0103, Entropy Loss: -46.4043
Current Intensity: 4.0294, Max Intensity: 7.7600
---


 96%|█████████▌| 2870/3000 [25:48<01:24,  1.55it/s]

Episode 2870, Reward: 120.7269
Policy Loss: -0.1083, Value Loss: 0.0155, Entropy Loss: -46.1010
Current Intensity: 3.2661, Max Intensity: 7.7474
---


 96%|█████████▌| 2874/3000 [25:49<00:48,  2.62it/s]

Episode 2875, Reward: 121.8537
Policy Loss: -0.1785, Value Loss: 0.0168, Entropy Loss: -46.3290
Current Intensity: 4.5217, Max Intensity: 7.7386
---


 96%|█████████▌| 2875/3000 [25:52<02:12,  1.06s/it]

Evaluation at episode 2875: Average Reward = 126.7496


 96%|█████████▌| 2880/3000 [25:54<01:19,  1.51it/s]

Episode 2880, Reward: 117.6114
Policy Loss: -0.2275, Value Loss: 0.0155, Entropy Loss: -46.2959
Current Intensity: 5.1446, Max Intensity: 7.6641
---


 96%|█████████▌| 2885/3000 [25:57<01:08,  1.67it/s]

Episode 2885, Reward: 120.4285
Policy Loss: -0.0751, Value Loss: 0.0325, Entropy Loss: -45.4175
Current Intensity: 5.0927, Max Intensity: 6.4668
---


 96%|█████████▋| 2890/3000 [25:59<01:08,  1.60it/s]

Episode 2890, Reward: 119.6164
Policy Loss: -0.2294, Value Loss: 0.0204, Entropy Loss: -45.5864
Current Intensity: 4.4719, Max Intensity: 6.0431
---


 96%|█████████▋| 2895/3000 [26:02<01:02,  1.68it/s]

Episode 2895, Reward: 123.3579
Policy Loss: -0.1815, Value Loss: 0.0141, Entropy Loss: -45.5772
Current Intensity: 4.6907, Max Intensity: 6.7762
---


 97%|█████████▋| 2899/3000 [26:03<00:38,  2.60it/s]

Episode 2900, Reward: 124.0815
Policy Loss: -0.1084, Value Loss: 0.0057, Entropy Loss: -44.8525
Current Intensity: 4.2816, Max Intensity: 6.7693
---


 97%|█████████▋| 2900/3000 [26:06<01:48,  1.08s/it]

Evaluation at episode 2900: Average Reward = 126.9269


 97%|█████████▋| 2905/3000 [26:08<01:06,  1.44it/s]

Episode 2905, Reward: 123.9466
Policy Loss: -0.1759, Value Loss: 0.0087, Entropy Loss: -45.6544
Current Intensity: 4.8319, Max Intensity: 7.4065
---


 97%|█████████▋| 2910/3000 [26:11<00:54,  1.64it/s]

Episode 2910, Reward: 122.0787
Policy Loss: -0.0816, Value Loss: 0.0078, Entropy Loss: -45.1162
Current Intensity: 3.3527, Max Intensity: 6.7873
---


 97%|█████████▋| 2915/3000 [26:13<00:53,  1.59it/s]

Episode 2915, Reward: 123.3955
Policy Loss: -0.2032, Value Loss: 0.0067, Entropy Loss: -45.9826
Current Intensity: 4.4061, Max Intensity: 6.8272
---


 97%|█████████▋| 2920/3000 [26:15<00:46,  1.71it/s]

Episode 2920, Reward: 124.7505
Policy Loss: 0.0018, Value Loss: 0.0226, Entropy Loss: -45.7558
Current Intensity: 6.4851, Max Intensity: 7.4585
---


 97%|█████████▋| 2924/3000 [26:17<00:28,  2.70it/s]

Episode 2925, Reward: 123.2332
Policy Loss: -0.1140, Value Loss: 0.0087, Entropy Loss: -46.0276
Current Intensity: 4.5803, Max Intensity: 7.3798
---


 98%|█████████▊| 2925/3000 [26:19<01:23,  1.12s/it]

Evaluation at episode 2925: Average Reward = 127.3566


 98%|█████████▊| 2930/3000 [26:22<00:47,  1.47it/s]

Episode 2930, Reward: 120.6927
Policy Loss: -0.1326, Value Loss: 0.0091, Entropy Loss: -45.3301
Current Intensity: 2.7486, Max Intensity: 7.8246
---


 98%|█████████▊| 2935/3000 [26:24<00:40,  1.60it/s]

Episode 2935, Reward: 123.4262
Policy Loss: -0.1769, Value Loss: 0.0120, Entropy Loss: -45.0097
Current Intensity: 4.0078, Max Intensity: 7.8688
---


 98%|█████████▊| 2940/3000 [26:27<00:35,  1.67it/s]

Episode 2940, Reward: 117.1555
Policy Loss: -0.1303, Value Loss: 0.0162, Entropy Loss: -44.9193
Current Intensity: 3.9974, Max Intensity: 6.0284
---


 98%|█████████▊| 2945/3000 [26:29<00:33,  1.66it/s]

Episode 2945, Reward: 120.9659
Policy Loss: -0.1713, Value Loss: 0.0096, Entropy Loss: -44.8630
Current Intensity: 3.1139, Max Intensity: 6.2198
---


 98%|█████████▊| 2949/3000 [26:30<00:20,  2.54it/s]

Episode 2950, Reward: 123.2813
Policy Loss: -0.1958, Value Loss: 0.0090, Entropy Loss: -44.1020
Current Intensity: 3.2639, Max Intensity: 7.1317
---


 98%|█████████▊| 2950/3000 [26:33<00:53,  1.07s/it]

Evaluation at episode 2950: Average Reward = 126.4118


 98%|█████████▊| 2955/3000 [26:35<00:31,  1.42it/s]

Episode 2955, Reward: 117.7881
Policy Loss: -0.1286, Value Loss: 0.0117, Entropy Loss: -44.2941
Current Intensity: 4.5488, Max Intensity: 6.7224
---


 99%|█████████▊| 2960/3000 [26:38<00:25,  1.57it/s]

Episode 2960, Reward: 122.1213
Policy Loss: -0.2050, Value Loss: 0.0120, Entropy Loss: -44.6678
Current Intensity: 3.5998, Max Intensity: 7.4060
---


 99%|█████████▉| 2965/3000 [26:40<00:20,  1.69it/s]

Episode 2965, Reward: 123.3712
Policy Loss: -0.2122, Value Loss: 0.0060, Entropy Loss: -44.6441
Current Intensity: 6.0435, Max Intensity: 7.3562
---


 99%|█████████▉| 2970/3000 [26:43<00:18,  1.61it/s]

Episode 2970, Reward: 123.1718
Policy Loss: -0.2294, Value Loss: 0.0113, Entropy Loss: -44.7792
Current Intensity: 4.4959, Max Intensity: 7.4727
---


 99%|█████████▉| 2974/3000 [26:44<00:09,  2.63it/s]

Episode 2975, Reward: 122.1382
Policy Loss: -0.1100, Value Loss: 0.0093, Entropy Loss: -44.8429
Current Intensity: 4.4523, Max Intensity: 6.1929
---


 99%|█████████▉| 2975/3000 [26:47<00:26,  1.07s/it]

Evaluation at episode 2975: Average Reward = 126.6345


 99%|█████████▉| 2980/3000 [26:49<00:13,  1.47it/s]

Episode 2980, Reward: 116.8744
Policy Loss: -0.2061, Value Loss: 0.0144, Entropy Loss: -44.0182
Current Intensity: 3.2284, Max Intensity: 6.1241
---


100%|█████████▉| 2985/3000 [26:51<00:09,  1.60it/s]

Episode 2985, Reward: 119.6913
Policy Loss: 0.0520, Value Loss: 0.0248, Entropy Loss: -44.1059
Current Intensity: 5.3803, Max Intensity: 6.2509
---


100%|█████████▉| 2990/3000 [26:54<00:06,  1.63it/s]

Episode 2990, Reward: 121.2925
Policy Loss: -0.1490, Value Loss: 0.0085, Entropy Loss: -44.7083
Current Intensity: 3.0271, Max Intensity: 6.8866
---


100%|█████████▉| 2995/3000 [26:56<00:02,  1.68it/s]

Episode 2995, Reward: 121.4797
Policy Loss: -0.0313, Value Loss: 0.0131, Entropy Loss: -44.3633
Current Intensity: 4.8580, Max Intensity: 7.2692
---


100%|█████████▉| 2999/3000 [26:57<00:00,  2.64it/s]

Episode 3000, Reward: 121.4914
Policy Loss: -0.0515, Value Loss: 0.0262, Entropy Loss: -43.3849
Current Intensity: 4.0705, Max Intensity: 6.7969
---


100%|██████████| 3000/3000 [27:00<00:00,  1.85it/s]

Evaluation at episode 3000: Average Reward = 127.0702





Training completed. Best intensity achieved: 9.2332
