In [None]:
!cat /etc/issue.net # OS
!cat /proc/cpuinfo  # CPU
!cat /proc/meminfo  # RAM
!df -h              # Disk
!nvidia-smi         # GPU

In [None]:
from google.colab import auth
auth.authenticate_user()

from google.colab import drive
drive.mount('/content/gdrive')

!cd "gdrive/My Drive/Colab Notebooks"; ls;

In [None]:
!cp "gdrive/My Drive/Colab Notebooks/DQN-hw/requirements.txt" .
!pip install -r requirements.txt

In [None]:
import os
import sys
sys.path.append(os.path.join('content', 'gdrive', 'My Drive', 'Colab Notebooks', 'DQN-hw'))

!cp -r "gdrive/My Drive/Colab Notebooks/DQN-hw/PyGame-Learning-Environment" .
!cd "PyGame-Learning-Environment"; pip install -e .;
sys.path.append('PyGame-Learning-Environment')
!cp -r "gdrive/My Drive/Colab Notebooks/DQN-hw/utils" .
from utils.env import Environment
!cp -r "gdrive/My Drive/Colab Notebooks/DQN-hw/agent" .
from agent.agent import Agent

In [None]:
import time

os.environ["SDL_VIDEODRIVER"] = "dummy"

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
import numpy as np
import cv2
from google.colab.patches import cv2_imshow
from google.colab import output

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Device:', device)
episode_durations = []


def convert_to_tensor(state, action, next_state, reward, done):
    state = torch.from_numpy(state).float() / 255.0

    action_onehot = np.zeros(2)
    action_onehot[action] = 1
    action_onehot = np.expand_dims(action_onehot, axis=0)
    action = torch.from_numpy(action_onehot).float()

    next_state = torch.from_numpy(next_state).float() / 255.0
    reward = torch.tensor([[reward]]).float()
    done = torch.tensor([[done]])

    return state, action, next_state, reward, done


def train(hParam, env, agent):
    num_episodes = int(1e6)
    best = 0

    for episode in range(num_episodes):
        env.reset()
        state = env.start()

        while not env.game_over():
            action = agent.getAction(state)
            next_state, reward, done = env.step(action)

            # PyGameDisplay to OpenCV
            frame = env.get_screen()
            frame = np.rot90(frame, k=1)
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            frame = frame[::-1]

            output.clear()
            cv2_imshow(frame)
            time.sleep(0.1)
            # output.clear()

            state_, action_, next_state_, reward_, done_ = convert_to_tensor(state, action, next_state, reward, done)

            agent.memory.push(state_, action_, next_state_, reward_, done_)
            loss = agent.updateQnet()

            state = next_state

        # cv2.destroyAllWindows()

        if episode > 100:
            if episode % hParam["TARGET_UPDATE"] == 0:
                agent.updateTargetNet()

            if episode % 10 == 1:
                print('Episode: {}, Reward: {:.3f}, Loss: {:.3f}'.format(episode, env.total_reward, loss))
                if env.total_reward > best:
                    agent.save()
                    best = env.total_reward

In [None]:
# if __name__ == "__main__":
hParam = {
    "BATCH_SIZE": 32,
    "GAMMA": 0.99,
    "TARGET_UPDATE": 5
}
env = Environment(device, display=True)
chulsoo = Agent(env.action_set, hParam)
train(hParam, env, chulsoo)