# Cases 1, 2, 3 and 4

### using observation vector as input and discrete velocity action as output
#### This notebook allows you to train PPO for various parameters and later test the accuracy

In [None]:
%matplotlib notebook

import ppo_twonet_discrete as ppo
import gym 
import matplotlib.pyplot as plt
import numpy as np
from urbanworld.urbanworld import UrbanWorldEnv

env = gym.make('MyUrbanWorld-v1')
env.env.version = 'easy'

### this will plot the 3D plot of the urban environment
env.env.plot = False

### Modify the height vector to change the cases from open-sky conditions to urban area conditions
env.env.HEIGHT = [[80]*7]*7

### Observation/Position vector
env.env.mode = 'pos' ##'obs' ## 
a_seed = 0
env.seed(a_seed)

set_params = {}

In [None]:
### urban v1 version
set_params.update({'clip': 0.2, 'max_grad': 0.6, 'ppo_epoch': 30, 'mem_size': 1000, 'batch_size': 32, \
                   'gamma': 0.9, 'no_eps': 2000, 'eps_len': 1000, 'env': env, 'lr': 1e-3})
urban_v1 = ppo.Agent(env)
rewards_v1, Lclip_v1, Lvalue_v1 = urban_v1.train_model(set_params)

In [None]:
### urban v5 version -> Changing memory size
set_params.update({'clip': 0.2, 'max_grad': 0.5, 'ppo_epoch': 50, 'mem_size': 600, 'batch_size': 32, \
                   'gamma': 0.9, 'no_eps': 1500, 'eps_len': 1000, 'env': env, 'lr': 1e-4})
urban_v5 = ppo.Agent(env)
rewards_v5, Lclip_v5, Lvalue_v5 = urban_v5.train_model(set_params)

In [None]:
### urban v3 version-> Changing learning rate
set_params.update({'clip': 0.2, 'max_grad': 0.5, 'ppo_epoch': 30, 'mem_size': 1000, 'batch_size': 32, \
                   'gamma': 0.9, 'no_eps': 1500, 'eps_len': 1000, 'env': env, 'lr': 1e-2})
urban_v3 = ppo.Agent(env)
rewards_v3, Lclip_v3, Lvalue_v3 = urban_v3.train_model(set_params)

In [None]:
### urban v4 version -> Changing batch size
set_params.update({'clip': 0.2, 'max_grad': 0.5, 'ppo_epoch': 10, 'mem_size': 1000, 'batch_size': 64, \
                   'gamma': 0.9, 'no_eps': 1500, 'eps_len': 1000, 'env': env, 'lr': 1e-4})
urban_v4 = ppo.Agent(env)
rewards_v4, Lclip_v4, Lvalue_v4 = urban_v4.train_model(set_params)

In [None]:
### urban v2 version -> Changing episode length
set_params.update({'clip': 0.2, 'max_grad': 0.5, 'ppo_epoch': 10, 'mem_size': 1000, 'batch_size': 32, \
                   'gamma': 0.9, 'no_eps': 1500, 'eps_len': 1500, 'env': env, 'lr': 1e-4})
urban_v2 = ppo.Agent(env)
rewards_v2, Lclip_v2, Lvalue_v2 = urban_v2.train_model(set_params)

In [None]:
plt.plot(np.transpose(rewards_v1)[0], np.transpose(rewards_v1)[1]/(urban_v1.eps_len), 'b.')
plt.plot(np.transpose(rewards_v1)[0], np.transpose(rewards_v1)[1]/(urban_v1.eps_len), 'b', label='default')

plt.plot(np.transpose(rewards_v2)[0], np.transpose(rewards_v2)[1]/(urban_v2.eps_len), 'r.')
plt.plot(np.transpose(rewards_v2)[0], np.transpose(rewards_v2)[1]/(urban_v2.eps_len), 'r', label='mem_size')#

plt.plot(np.transpose(rewards_v3)[0], np.transpose(rewards_v3)[1]/(urban_v3.eps_len), 'g.')
plt.plot(np.transpose(rewards_v3)[0], np.transpose(rewards_v3)[1]/(urban_v3.eps_len), 'g', label='lr')

plt.plot(np.transpose(rewards_v4)[0], np.transpose(rewards_v4)[1]/(urban_v4.eps_len), 'k.')
plt.plot(np.transpose(rewards_v4)[0], np.transpose(rewards_v4)[1]/(urban_v4.eps_len), 'k', label='batch_size')

plt.plot(np.transpose(rewards_v5)[0], np.transpose(rewards_v5)[1]/(urban_v5.eps_len), 'm.')
plt.plot(np.transpose(rewards_v5)[0], np.transpose(rewards_v5)[1]/(urban_v5.eps_len), 'm', label='eps_len')

plt.xlabel('Episode')
plt.ylabel('Average Reward')
plt.legend()
plt.show()

In [None]:
### This is for testing the model!
eps_len = 400
agent = urban_v3
record_pos, record_action, rewards = agent.test_model(eps_len)
print('Rewards attained: ', rewards)
plt.figure()
get_angle = np.arctan2(np.transpose(record_pos)[1], np.transpose(record_pos)[0])* 180 / np.pi
plt.subplot(211)
plt.plot(np.arange(eps_len), get_angle, 'b')
plt.plot(np.arange(eps_len), get_angle, 'b.')
plt.subplot(212)
plt.plot(np.arange(eps_len), np.transpose(record_pos)[2], 'b')
plt.plot(np.arange(eps_len), np.transpose(record_pos)[2], 'b.')
plt.show()