# Project Main
* In this notebook is possible to train and evaluate the agents. Then there is also a brief illustration on how the frames are preprocessed. 
* There are some pre-trained models available for Space-Invaders and Phoenix.
* The agent can be trained on Space Invaders, Assault and Phoenix.

## Installing Libraries

In [None]:
!pip install gym
!pip install gym[atari]
!pip install autorom[accept-rom-license]
!pip install torch
!pip install wandb -qU

## Importing Libraries

In [1]:
### EXTERNAL LIBRARIES
import gym 
import matplotlib.pyplot as plt
import torch
from PIL import Image
import wandb

### INTERNAL FILES
from src.variables import *
from src.model import *
from src.utils import *
from torchvision import transforms
import torch

  from .autonotebook import tqdm as notebook_tqdm
A.L.E: Arcade Learning Environment (version 0.8.0+919230b)
[Powered by Stella]


#### Enabling Weights and Biases to track the experiments
* [Weights and Biases Project](https://wandb.ai/difra00/RL_Curiosity_agent?workspace=user-difra00)

In [None]:
if wb:
    wandb.login()

### Select the training option:
* load: Load a pre-trained model;
* train: Train a new-one;
* save: Select if saving the model after each new episode.
* ext/intr: Select the type of training, whether using only extrinsic rewards, only intrinsic, or both.

In [None]:
# VARIABLES SELECTION #############################################################################
load = True     # if load is False the model is taken with the random initialization of the weights
train = True   # if train is False the testing mode is enables
save = False    # if True, the model is saved after each episode.

ext = True   # Extrinsic reward enabled
intr = True  # Intrinsic reward enabled (Curiosity reward)
###################################################################################################


add = '.pt'
if intr: 
    add = '_intrinsic' + add
if ext: 
    add = '_extrinsic' + add


# For SpaceInvaders there is only available a pre-trained model with extrinsic and intrinsic rewards. 
# It was only trained for 100 episodes

model_load = './models/' + game[4: -3] +add

model_save = ''

seed_list = [30, 60, 90]

if train:
    for i in range(3):
        agent = Policy(model_name_load = model_load, model_name_save = model_save, ext = ext, intr = intr, seed = seed_list[i], load = load, save = save)

        agent.trainer(n_training_episodes=training_episodes, exp_name= 'exp_name{}'.format(i+1) ) # only extrinsic: 0, only intrinsic: 1, extrinsic + intrinsic: 2
else:
    agent = Policy(model_name_load = model_load, model_name_save = model_save, ext = True, intr = True, seed = seed_list[0], load = load, save = save)



In [None]:
agent.load()
evaluate_agent(agent, n_eval_episodes = 1, render = True)  # Intrinsic: 945 mean, Extrinsic: 1386

## Set-up the game
* How the rendering works


In [2]:
phoenix = "ALE/Phoenix-v5"
space_invaders = "ALE/SpaceInvaders-v5"
assault = "ALE/Assault-v5"



game = env_id = assault

env = gym.make(env_id, render_mode = 'human')


## Show some steps of the chosen game

In [3]:
if wb:
    wandb.init(
                    project= project_name, 

                    name = 'random_agent_assault')

## Run this cell to get some episode of the game.

In [4]:
scores_deque = deque(maxlen = 100)
env.reset()
rewards_list = []
for episode in range(3):
        total_reward = 0
        done = False
        s, _ = env.reset()
        rew_list = 0
        while not done:
            
            action = env.action_space.sample()
            s, reward, done, truncated, info = env.step(action)
   
            env.render()
            rew_list+=reward
        scores_deque.append(rew_list)


  logger.warn(


: 

: 

## Data preprocessing: Result of the image transformation.
* This image transformation was selected so as reduce the ambuiguities stemming from the details in the games.
* Since the considered video-games have similar rules, setting, and objectives, we want to have similar frame representation in the latent space.

In [None]:
phoenix_game = np.load('./images/phoenix.npy')
space_invaders_game = np.load('./images/space_invaders.npy')
assault_game = np.load('./images/assault.npy')

In [None]:
import time
def plot_nparray_with_torch_transform(np_array):
    ''' This function takes as input a numpy array, it applies all the desired transformation with torchvision.transforms, and then it display the new representation 
        INPUT: np_array: It's a numpy array,
        OUTPUT: ----        '''
    observation_new = transform(Image.fromarray(np_array))
    observation_new = np.array(observation_new)/255
    plt.imshow((observation_new), cmap = 'gray')
    plt.show()

plot_nparray_with_torch_transform(phoenix_game)
time.sleep(1)
plot_nparray_with_torch_transform(space_invaders_game)
time.sleep(1)
plot_nparray_with_torch_transform(assault_game)
