# Standard imports

In [None]:
import os

import sys
import subprocess
import pkg_resources

import argparse
import numpy as np
import torch
import torch.nn as nn
from copy import deepcopy
from datetime import datetime
from pytz import timezone
from collections import OrderedDict
import more_itertools as mit
import pandas as pd
import random
import re # regular expression

from enum import Enum 

from traitlets.traitlets import HasTraits, Int, Unicode, default

from typing import List, Set, Dict, Tuple, Any

import importlib

# Import Holoviz Panel

In [None]:
if 'panel' not in [pkg.key for pkg in pkg_resources.working_set]:
    !pip install panel
else:
    print('panel already installed')

from bokeh.core.validation import check

import panel as pn
from panel.interact import interact, fixed
from panel import widgets
import param

from panel.layout.gridstack import GridStack

pn.extension() # required for panels to be displayed in Jupyter notebook

# Set up Holoviz panels for Jupyter notebook

In [None]:
pn.extension(raw_css=["""
div.orange_border_table + table * {
  border: 1px solid orange;
}
"""])

css_log_widget_box = '''
.bk.log-widget-box {
  color: #ffffff;
  background: #000000;
  border: 1px black solid;
}
'''

pn.extension(raw_css=[css_log_widget_box])

pn.extension('tabulator')

# Import rlcard

In [None]:
if 'rlcard' not in [pkg.key for pkg in pkg_resources.working_set]:
    !pip install -vvv rlcard
else:
    print('rlcard already installed')

In [None]:
import rlcard
from rlcard.agents import RandomAgent
from rlcard.utils import get_device, set_seed, tournament, reorganize, Logger, plot_curve
from rlcard.agents import DQNAgent
from rlcard.models.gin_rummy_rule_models import GinRummyNoviceRuleAgent

from rlcard.agents.dqn_agent import Memory
from rlcard.agents.dqn_agent import Estimator

from rlcard.games.gin_rummy.player import GinRummyPlayer
from rlcard.games.gin_rummy.game import GinRummyGame
from rlcard.games.gin_rummy.utils.action_event import ActionEvent, DiscardAction
from rlcard.games.gin_rummy.utils.action_event import KnockAction, GinAction, DeclareDeadHandAction

from rlcard.games.gin_rummy.utils.scorers import GinRummyScorer
from rlcard.games.gin_rummy.utils.settings import Setting, Settings, DealerForRound

from rlcard.games.gin_rummy.utils.action_event import draw_card_action_id, pick_up_discard_action_id

import rlcard.games.gin_rummy.utils.utils as utils
import rlcard.games.gin_rummy.utils.melding as melding

from rlcard.games.gin_rummy.utils.thinker import Thinker

from rlcard.games.base import Card

from rlcard.envs.gin_rummy import GinRummyEnv

from rlcard.agents import NFSPAgent

import rlcard as rlcard # NOTE: don't know why this is needed; don't know why it has to be placed last

# My Game Reviewer

In [None]:
# We will need the following function.

def sortByRankBySuit(card: Card):
    # by rank (A 2 3 ... J Q K) by suit (C D H S)
    from rlcard.games.gin_rummy.utils.utils import get_rank_id, get_suit_id
    return 4 * get_rank_id(card) + 3 - get_suit_id(card)

def card_from_card_index(text: str) -> Card:
    if len(text) != 2:
        raise Exception("len(text) is {}: should be 2.".format(len(text)))
    return Card(rank=text[1], suit=text[0])

In [None]:
# We will create a pandas dataframe with the individual steps of playing a game.
# We will build a view that shows one row of the dataframe.
# Each row of the dataframe corresponds to a move by a player.
# The learning agent is player_id 0.
# The opponent is player_id 1.

# The following is hard-coded since we are interested in the view only.
# The data is generated without any training taking place.

# Set up environment. You can execute this cell just once.

def create_env():
    env = rlcard.make('gin-rummy')
    game= env.game
    num_actions = env.num_actions
    state_shape = env.state_shape[0]
    mlp_layers = [128, 128, 128]
    dqn_agent = DQNAgent(num_actions=num_actions, state_shape=state_shape, mlp_layers=mlp_layers)
    opponent = GinRummyNoviceRuleAgent()
    agents = [dqn_agent, opponent]
    env.set_agents(agents)
    return env

env = create_env()

In [None]:
# Play a game.
# You can re-execute this cell to produce new data.

def create_row(action, game, env_state):
    row = dict()
    player_id = game.get_player_id()
    legal_actions = game.judge.get_legal_actions()
    game_state = game.get_state(player_id=player_id)
    row['player_id'] = player_id
    row['legal_action_ids'] = [x.action_id for x in legal_actions]
    row['action_id'] = action
    row['top_card'] = [card_from_card_index(x) for x in game_state['top_discard']]
    row['held_cards'] = game.round.players[player_id].hand
    row['dead_cards'] = [card_from_card_index(x) for x in game_state['dead_cards']]
    row['env_state'] = env_state
    row['unknown_cards'] = [card_from_card_index(x) for x in game_state['unknown_cards']]
    row['opponent_known_cards'] = [card_from_card_index(x) for x in game_state['opponent_known_cards']]
    return row

def create_rows(env):
    rows = []
    state, player_id = env.reset()
    while not env.is_over():
        agent = env.agents[player_id]
        env_state = env.get_state(player_id=player_id)
        action, _ = agent.eval_step(env_state)
        row = create_row(action=action, game=env.game, env_state=env_state)
        rows.append(row)
        state, player_id = env.step(action=action)
    print('done')
    return rows

rows = create_rows(env=env)

In [None]:
# Create two pandas dataframes: dataframe and filtered_df.
# The filtered_df is the more interesting one since it corresponds to the dqn_agent actions.

dataframe = pd.DataFrame(rows)
filtered_df = dataframe.query("player_id == 0")
dataframe.head(7) # There are many ways to view a dataframe.

In [None]:
class MyGamePane(pn.pane.Markdown):

    def __init__(self, dataframe, row_id, agents):
        super().__init__()
        self.object = self.make_game_text(dataframe, row_id, agents)

    def make_game_text(self, dataframe, row_id, agents) -> str:
            max_rows = len(dataframe)
            if max_rows == 0:
                return "### Gin Rummy (no rows)"
            row = dataframe.iloc[row_id]
            current_row = row_id + 1
            player_id = row['player_id']
            top_card = row['top_card']
            top_card_name = top_card[0] if top_card else '*'
            legal_action_ids = row['legal_action_ids']
            action_id = row['action_id']

            held_cards = sorted(row['held_cards'], key=sortByRankBySuit, reverse=True)
            held_card_names = " ".join(map(str, held_cards))
            actionEvent = ActionEvent.decode_action(action_id)

            opponent_known_cards = sorted(row['opponent_known_cards'], key=sortByRankBySuit, reverse=True)
            opponent_known_cards_names = " ".join(map(str, opponent_known_cards))

            dead_cards = sorted(row['dead_cards'], key=sortByRankBySuit, reverse=True)
            dead_cards_names = " ".join(map(str, dead_cards))

            unknown_cards = sorted(row['unknown_cards'], key=sortByRankBySuit, reverse=True)
            unknown_cards_names = " ".join(map(str, unknown_cards))

            env_state = row['env_state']
            agent = agents[player_id]
            best_action, info = agent.eval_step(state=env_state)

            deck_count = len(held_cards) + len(dead_cards) + len(unknown_cards) + len(opponent_known_cards) + (1 if top_card else 0)
            if action_id == 0 or action_id == 1:
                deck_count += 1 # kludge

            return f"""
            ### Gin Rummy ({current_row} of {max_rows})

            player_id: {player_id}

            opponent_known_cards: [{opponent_known_cards_names}]

            top_card: {top_card_name} dead_cards: [{dead_cards_names}]

            held_cards: [{held_card_names}]

            legal_actions: {legal_action_ids}

            action: {actionEvent} <{action_id}>

            unknown_cards: [{unknown_cards_names}]

            deck_count: {deck_count} Note: should be 52.

            best_action: {best_action} {ActionEvent.decode_action(best_action)}
        """

# Choose your row_id and your dataframe

In [None]:
# Set which dataframe to use.
# Set row_id here to avoid going out of bounds
is_use_filtered_df = True # You can change this to False if you want
my_row_id = 5 # You can change this if you want
df = filtered_df if is_use_filtered_df else dataframe
row_id = min(my_row_id, len(df))

# Build some panels

In [None]:
# Here we use Holoviz panel to view part of the pandas dataframe.
# I coded GamePane using the Holoviz panel library.
# You can look at the simple code in the github repo.
# Note: GamePane is within the file ReviewPlayWindow.py

# Note: the view must be the last statement of the jupyter notebook cell.

# Note: This is designed for dark mode only. I am not going to use light mode.

MyGamePane(dataframe=df, row_id=row_id, agents=env.agents)

In [None]:
class MyInfoTablePane(pn.pane.Markdown):

    def __init__(self, dataframe, row_id, agents):
        super().__init__()
        self.object = self.make_info_table(dataframe, row_id, agents)

    def make_info_table(self, dataframe, row_id:int, agents) -> str:
        max_rows = len(dataframe)
        if max_rows == 0:
            return ""
        row = dataframe.iloc[row_id]
        player_id = row['player_id']
        env_state = row['env_state']
        agent = agents[player_id]
        best_action, info = agent.eval_step(state=env_state)
        if not info:
            return ""
        info_sorted_by_value = OrderedDict(sorted(info['values'].items(), key=lambda x: x[1], reverse=True))
        info_table_lines = []
        info_table_lines.append("### DQNAgent\n")
        info_table_lines.append("<div class='orange_border_table'></div>")
        info_table_lines.append("| Action | Description | Q_value |")
        info_table_lines.append("\n")
        info_table_lines.append("| :--- | :--- | :---- |")
        info_table_lines.append("\n")
        for info_action, q_value in info_sorted_by_value.items():
            info_table_lines.append(f'|{info_action} | {ActionEvent.decode_action(info_action)} | {q_value} |')
            info_table_lines.append("\n")
        info_table = " ".join(info_table_lines)
        return info_table

In [None]:
# Here we use Holoviz panel to view another part of the pandas dataframe.
# I coded InfoTablePane using the Holoviz panel library.
# You can look at the simple code in the github repo.
# Note: InfoTablePane is within the file ReviewPlayWindow.py

# Note: This view is empty for player_id 1 since the opponent is not a DQNAgent.
# Look at the raw dataframe displayed above to see what rows correspond to player_id 0.
# The best examples are when player_id 0 is discarding.

# Note: the view must be the last statement of the jupyter notebook cell.

MyInfoTablePane(dataframe=df, row_id=row_id, agents=env.agents)

In [None]:
# Now create a view to display these components together.
# When you are satisfied, you can put the class into your python module library.
# Note: The title_pane uses markdown. It can be multi-lined (use triple quote).

class MyGameReviewer(pn.Column):

    def __init__(self, dataframe, row_id, agents):
        super().__init__()
        title_pane = pn.pane.Markdown(f'## Game Reviewer')
        game_pane = MyGamePane(dataframe=dataframe, row_id=row_id, agents=agents)
        info_pane = MyInfoTablePane(dataframe=dataframe, row_id=row_id, agents=agents)
        self.append(title_pane)
        self.append(game_pane)
        self.append(info_pane)
        self.background = 'green'
        self.width = 1200

In [None]:
# Show the new view.
# Change the row_id value to get different steps in the games of the match.
# Rows with action_id > 3 corresponds to player discarding.

# Note: the view must be the last statement of the jupyter notebook cell.

# Note: This is designed for dark mode only. I am not going to use light mode.

MyGameReviewer(dataframe=df, row_id=row_id, agents=env.agents)