<a href="https://colab.research.google.com/github/dfrnks/py-memory-game/blob/main/learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Import

In [None]:
%cd /content/
%rm -rf py_memory_game
%rm -rf runs

!git clone https://github.com/dfrnks/py-memory-game.git py_memory_game

%cd py_memory_game
!git checkout linear-network-training
!ls

/content
Cloning into 'py_memory_game'...
remote: Enumerating objects: 463, done.[K
remote: Counting objects: 100% (463/463), done.[K
remote: Compressing objects: 100% (339/339), done.[K
remote: Total 463 (delta 251), reused 303 (delta 119), pack-reused 0[K
Receiving objects: 100% (463/463), 9.43 MiB | 16.56 MiB/s, done.
Resolving deltas: 100% (251/251), done.
/content/py_memory_game
Branch 'linear-network-training' set up to track remote branch 'linear-network-training' from 'origin'.
Switched to a new branch 'linear-network-training'
learning.py  LICENSE  main.py  play.py	README.md  requirements.txt  src


In [None]:
import copy

import torch

from torch import nn

In [None]:
import datetime

from pathlib import Path


from py_memory_game.src import MemoryGameEnv
from py_memory_game.src import MemoryAgent

from py_memory_game import learning

In [None]:
class GetLSTMOutput(nn.Module):
    def forward(self, x):
        out, _ = x
        return out


class MemoryNet(nn.Module):
    def __init__(self, input_dim, output_dim, device):
        super().__init__()

        self.device = device

        h, w = input_dim

        if h != 16:
            raise ValueError(f"Expecting input height: 16, got: {h}")
        if w != 16:
            raise ValueError(f"Expecting input width: 16, got: {w}")

        self.online = nn.Sequential(
            nn.LSTM(16, 128, bidirectional=True),
            #nn.RNN(16, 128, 16),
            #nn.GRU(16, 128),
            GetLSTMOutput(),
            nn.Linear(256, 16), # nn.Linear(128, 16),
            nn.ReLU(),
            nn.Linear(16, output_dim),
            # nn.ReLU(),
            # nn.Linear(16, 16),
            # nn.ReLU(),
            # nn.Linear(16, output_dim)
        )

        self.online = self.online.to(device=self.device)

        self.target = copy.deepcopy(self.online)

        # Q_target parameters are frozen.
        for p in self.target.parameters():
            p.requires_grad = False

    def forward(self, input, model):
        input = input.unsqueeze(1)

        if model == "online":
            return self.online(input).squeeze(1)
        elif model == "target":
            return self.target(input).squeeze(1)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.cuda.is_available()

True

In [None]:
game_board_completed = [
  102, 107, 106, 107, 
  104, 104, 100, 103, 
  100, 105, 102, 101, 
  105, 103, 106, 101
]

game_board = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

#env = MemoryGameEnv((4, 4), game_board_completed, game_board)
env = MemoryGameEnv((4, 4))

save_dir = Path("/content/drive/MyDrive/Estudos/PUCRS/Ciencia de dados e Inteligencia Artificial/TCC/Memory Game/checkpoints")

net = MemoryNet((env.action_space.n, env.action_space.n), env.action_space.n, device)

agent = MemoryAgent(
    state_dim=(env.action_space.n, env.action_space.n),
    action_dim=env.action_space.n,
    save_dir=save_dir,
    net=net,
    lr=0.00025,
    max_memory_size=100000,
    batch_size=32,
    gamma=0.9,
    exploration_rate=1,
    exploration_rate_decay=0.99999975,
    exploration_rate_min=0.1,
    save_every=5e5,
    burnin=1e4,
    learn_every=3,
    sync_every=1e4,
)

# Training

In [None]:
learning.training(agent, env, 1000, 'training-board-random-LSTM-bidirecional')

Load network


Training 994:  99%|█████████▉| 994/1000 [1:42:22<00:28,  4.71s/it, Episode 980 - Step 5967745 - Epsilon 0.1 - Mean Reward -3180.05 - Mean Length 1472.83 - Mean Loss 0.671 - Mean Q Value -18.442 - Time Delta 130.351 - Time 2022-04-20T22:17:44]

In [None]:
learning.eval(agent, env, 1000, 'eval-board-random-LSTM-bidirecional')

Load network


Eval 999: 100%|██████████| 1000/1000 [13:02<00:00,  1.28it/s, Episode 980 - Step 7828815 - Epsilon 0.1 - Mean Reward -1365.87 - Mean Length 916.79 - Mean Loss 0.0 - Mean Q Value 0.0 - Time Delta 14.32 - Time 2022-04-18T20:18:24]


In [None]:
#learning.rand(agent, env, 1000, 'random-board-fixed')
#learning.rand(agent, env, 1000, 'random-board-random')

Random playing 999: 100%|██████████| 1000/1000 [00:39<00:00, 25.17it/s, Episode 980 - Step 4293310 - Epsilon 0.1 - Mean Reward -369.3 - Mean Length 232.38 - Mean Loss 0.0 - Mean Q Value 0.0 - Time Delta 0.814 - Time 2022-04-10T19:52:55]


# Playing

In [None]:
from py_memory_game.play import play_with_network
from py_memory_game.play import play_random

In [None]:
#agent.save_dir = Path("/content/drive/MyDrive/Estudos/PUCRS/Ciencia de dados e Inteligencia Artificial/TCC/Memory Game/")

In [None]:
env = MemoryGameEnv((4, 4))

save_dir = Path("/content/drive/MyDrive/Estudos/PUCRS/Ciencia de dados e Inteligencia Artificial/TCC/Memory Game/playing")

net = MemoryNet((env.action_space.n, env.action_space.n), env.action_space.n, device)

agent = MemoryAgent(
    state_dim=(env.action_space.n, env.action_space.n),
    action_dim=env.action_space.n,
    save_dir=save_dir,
    net=net
)

# agent.load('/content/drive/MyDrive/Estudos/PUCRS/Ciencia de dados e Inteligencia Artificial/TCC/Memory Game/checkpoints/memory_net.chkpt')
agent.load('/content/drive/MyDrive/Estudos/PUCRS/Ciencia de dados e Inteligencia Artificial/TCC/Memory Game/checkpoints/training-board-fixed-LSTM-bidirecional/memory_net.chkpt')

Load network


In [None]:
play_with_network(agent, env, 1000)

--- 0-0 item. 0.9646124839782715 seconds. 2091 jogadas, 20 Pontos ---
--- 0-1 item. 1.0687048435211182 seconds. 2247 jogadas, 43 Pontos ---
--- 0-2 item. 0.8894119262695312 seconds. 1945 jogadas, 29 Pontos ---
--- 0-3 item. 1.1127829551696777 seconds. 2425 jogadas, 30 Pontos ---
--- 0-4 item. 1.0137052536010742 seconds. 2154 jogadas, 35 Pontos ---
--- 0-5 item. 0.5093204975128174 seconds. 1076 jogadas, 25 Pontos ---
--- 0-6 item. 0.7195026874542236 seconds. 1556 jogadas, 39 Pontos ---
--- 0-7 item. 1.1972894668579102 seconds. 2606 jogadas, 34 Pontos ---
--- 0-8 item. 1.1089575290679932 seconds. 2428 jogadas, 20 Pontos ---
--- 0-9 item. 0.6007809638977051 seconds. 1289 jogadas, 30 Pontos ---
--- 0-10 item. 0.7473409175872803 seconds. 1654 jogadas, 20 Pontos ---
--- 0-11 item. 0.8982949256896973 seconds. 1934 jogadas, 29 Pontos ---
--- 0-12 item. 0.9252970218658447 seconds. 2016 jogadas, 20 Pontos ---
--- 0-13 item. 0.600670337677002 seconds. 1305 jogadas, 68 Pontos ---
--- 0-14 item. 0.

In [None]:
play_random(4,4, 1000, 1, save_dir)

--- 0-1 item. 0.010358572006225586 seconds. 307 jogadas, 20 Pontos ---
--- 1-1 item. 0.008061647415161133 seconds. 330 jogadas, 29 Pontos ---
--- 2-1 item. 0.003333568572998047 seconds. 144 jogadas, 30 Pontos ---
--- 3-1 item. 0.0034165382385253906 seconds. 130 jogadas, 40 Pontos ---
--- 4-1 item. 0.00345611572265625 seconds. 173 jogadas, 36 Pontos ---
--- 5-1 item. 0.004786014556884766 seconds. 211 jogadas, 38 Pontos ---
--- 6-1 item. 0.004469633102416992 seconds. 174 jogadas, 40 Pontos ---
--- 7-1 item. 0.0040302276611328125 seconds. 190 jogadas, 38 Pontos ---
--- 8-1 item. 0.005353212356567383 seconds. 279 jogadas, 20 Pontos ---
--- 9-1 item. 0.005390167236328125 seconds. 263 jogadas, 25 Pontos ---
--- 10-1 item. 0.005870342254638672 seconds. 220 jogadas, 30 Pontos ---
--- 11-1 item. 0.00594639778137207 seconds. 258 jogadas, 47 Pontos ---
--- 12-1 item. 0.0038013458251953125 seconds. 178 jogadas, 39 Pontos ---
--- 13-1 item. 0.003977298736572266 seconds. 197 jogadas, 49 Pontos ---
-