In [1]:
import sys
import numpy as np

sys.path.insert(1, "C:/Users/elias/OneDrive/Dokumente/Git/Kniffel/")

from src.env.open_ai_env import KniffelEnv
from src.env.env_helper import EnumAction




In [2]:
env_config = {
    "reward_roll_dice": 0,
    "reward_game_over": -0,
    "reward_finish": 25,
    "reward_bonus": 100,
    "reward_mode": "custom",  # custom or kniffel
    "state_mode": "continuous",  # binary or continuous
    "reward_kniffel": {
        "reward_ones": {
            "reward_five_dices": 5,
            "reward_four_dices": 4.0,
            "reward_three_dices": 2.0,
            "reward_two_dices": -0,
            "reward_one_dice": -1,
            "reward_slash": -2,
        },
        "reward_twos": {
            "reward_five_dices": 10.0,
            "reward_four_dices": 8.0,
            "reward_three_dices": 6.0,
            "reward_two_dices": -0,
            "reward_one_dice": -3,
            "reward_slash": -4,
        },
        "reward_threes": {
            "reward_five_dices": 15.0,
            "reward_four_dices": 12.0,
            "reward_three_dices": 9.0,
            "reward_two_dices": -0,
            "reward_one_dice": -4.5,
            "reward_slash": -6,
        },
        "reward_fours": {
            "reward_five_dices": 20.0,
            "reward_four_dices": 16.0,
            "reward_three_dices": 12.0,
            "reward_two_dices": -0,
            "reward_one_dice": -6,
            "reward_slash": -8,
        },
        "reward_fives": {
            "reward_five_dices": 25.0,
            "reward_four_dices": 20.0,
            "reward_three_dices": 15.0,
            "reward_two_dices": -0,
            "reward_one_dice": -7.5,
            "reward_slash": -10,
        },
        "reward_sixes": {
            "reward_five_dices": 30.0,
            "reward_four_dices": 24.0,
            "reward_three_dices": 18.0,
            "reward_two_dices": -0,
            "reward_one_dice": -9,
            "reward_slash": -12,
        },
        "reward_three_times": {
            "reward_five_dices": 30.0,
            "reward_four_dices": 24.0,
            "reward_three_dices": 18.0,
            "reward_two_dices": 9.0,
            "reward_one_dice": 0.9,
            "reward_slash": -0,
        },
        "reward_four_times": {
            "reward_five_dices": 50.0,
            "reward_four_dices": 40.0,
            "reward_three_dices": 15.0,
            "reward_two_dices": 5,
            "reward_one_dice": 0.7,
            "reward_slash": -12,
        },
        "reward_full_house": {
            "reward_five_dices": 50.0,
            "reward_four_dices": None,
            "reward_three_dices": None,
            "reward_two_dices": None,
            "reward_one_dice": None,
            "reward_slash": -0,
        },
        "reward_small_street": {
            "reward_five_dices": 25.0,
            "reward_four_dices": 25.0,
            "reward_three_dices": None,
            "reward_two_dices": None,
            "reward_one_dice": None,
            "reward_slash": -0,
        },
        "reward_large_street": {
            "reward_five_dices": 60.0,
            "reward_four_dices": None,
            "reward_three_dices": None,
            "reward_two_dices": None,
            "reward_one_dice": None,
            "reward_slash": -0,
        },
        "reward_kniffel": {
            "reward_five_dices": 100.0,
            "reward_four_dices": None,
            "reward_three_dices": None,
            "reward_two_dices": None,
            "reward_one_dice": None,
            "reward_slash": -25,
        },
        "reward_chance": {
            "reward_five_dices": 30.0,
            "reward_four_dices": 24.0,
            "reward_three_dices": 18.0,
            "reward_two_dices": 9.0,
            "reward_one_dice": 0.9,
            "reward_slash": -0,
        },
    },
}


In [3]:
def send_step(env: KniffelEnv, dice, action: EnumAction, logging=False):

    if len(dice) > 0:
        env.mock(dice)

    n_state, reward, done, info = env.step(action)

    if logging:
        print(
            f"     Reward ({action.name}): {reward}, Kniffel points: {env.kniffel_helper.kniffel.get_points()}"
        )

    return reward

In [4]:
def send_steps(name: str, actions: list, env_config: dict, logging=False):
    reward_mode = env_config["reward_mode"]
    state_mode = env_config["state_mode"]

    env_observation_space = 42
    env_action_space = 57

    env = KniffelEnv(
        env_config,
        reward_mode=reward_mode,
        state_mode=state_mode,
        env_observation_space=env_observation_space,
        env_action_space=env_action_space,
        logging=False,
    )

    print(name)
    env.reset()
    reward = 0
    for action in actions:
        dices = action["dices"]
        action = action["action"]

        reward += send_step(env, dices, action, logging=logging)

    print(f"     {env.kniffel_helper.kniffel.get_state()}")

    print()
    print("     Final Reward: ", reward)
    print()
    print()

In [5]:
# perfect game
perfect_game = [
    # top
    {"dices": [1, 1, 1, 1, 1], "action": EnumAction.FINISH_ONES},
    {"dices": [2, 2, 2, 2, 2], "action": EnumAction.FINISH_TWOS},
    {"dices": [3, 3, 3, 3, 3], "action": EnumAction.FINISH_THREES},
    {"dices": [4, 4, 4, 4, 4], "action": EnumAction.FINISH_FOURS},
    {"dices": [5, 5, 5, 5, 5], "action": EnumAction.FINISH_FIVES},
    {"dices": [6, 6, 6, 6, 6], "action": EnumAction.FINISH_SIXES},
    # bottom
    {"dices": [6, 6, 6, 6, 6], "action": EnumAction.FINISH_THREE_TIMES},
    {"dices": [6, 6, 6, 6, 6], "action": EnumAction.FINISH_FOUR_TIMES},
    {"dices": [6, 6, 6, 5, 5], "action": EnumAction.FINISH_FULL_HOUSE},
    {"dices": [1, 2, 3, 4, 5], "action": EnumAction.FINISH_SMALL_STREET},
    {"dices": [1, 2, 3, 4, 5], "action": EnumAction.FINISH_LARGE_STREET},
    {"dices": [6, 6, 6, 6, 6], "action": EnumAction.FINISH_KNIFFEL},
    {"dices": [6, 6, 6, 6, 6], "action": EnumAction.FINISH_CHANCE},
]

# game with ~150 points
game_150_points = [
    # top
    {"dices": [1, 2, 2, 2, 2], "action": EnumAction.FINISH_ONES},
    {"dices": [2, 3, 4, 3, 3], "action": EnumAction.FINISH_TWOS},
    {"dices": [3, 3, 4, 4, 4], "action": EnumAction.FINISH_THREES},
    {"dices": [4, 4, 4, 4, 5], "action": EnumAction.FINISH_FOURS},
    {"dices": [5, 5, 3, 3, 3], "action": EnumAction.FINISH_FIVES},
    {"dices": [6, 6, 6, 5, 5], "action": EnumAction.FINISH_SIXES},
    # bottom
    {"dices": [6, 6, 6, 4, 1], "action": EnumAction.FINISH_THREE_TIMES},
    {"dices": [6, 6, 6, 1, 2], "action": EnumAction.FINISH_FOUR_TIMES},
    {"dices": [6, 6, 6, 5, 5], "action": EnumAction.FINISH_FULL_HOUSE},
    {"dices": [1, 2, 3, 4, 5], "action": EnumAction.FINISH_SMALL_STREET},
    {"dices": [1, 2, 3, 4, 2], "action": EnumAction.FINISH_LARGE_STREET},
    {"dices": [6, 6, 6, 6, 5], "action": EnumAction.FINISH_KNIFFEL},
    {"dices": [6, 6, 6, 1, 1], "action": EnumAction.FINISH_CHANCE},
]

# game with ~250 points
game_250_points = [
    # top
    {"dices": [2, 2, 2, 2, 2], "action": EnumAction.FINISH_ONES},
    {"dices": [2, 2, 2, 3, 3], "action": EnumAction.FINISH_TWOS},
    {"dices": [3, 3, 4, 4, 4], "action": EnumAction.FINISH_THREES},
    {"dices": [4, 4, 4, 4, 5], "action": EnumAction.FINISH_FOURS},
    {"dices": [5, 5, 5, 3, 3], "action": EnumAction.FINISH_FIVES},
    {"dices": [6, 6, 6, 5, 5], "action": EnumAction.FINISH_SIXES},
    # bottom
    {"dices": [6, 6, 6, 6, 6], "action": EnumAction.FINISH_THREE_TIMES},
    {"dices": [6, 6, 6, 1, 2], "action": EnumAction.FINISH_FOUR_TIMES},
    {"dices": [6, 6, 6, 5, 5], "action": EnumAction.FINISH_FULL_HOUSE},
    {"dices": [1, 2, 3, 4, 5], "action": EnumAction.FINISH_SMALL_STREET},
    {"dices": [1, 2, 3, 4, 5], "action": EnumAction.FINISH_LARGE_STREET},
    {"dices": [6, 6, 6, 5, 5], "action": EnumAction.FINISH_KNIFFEL},
    {"dices": [6, 6, 6, 1, 2], "action": EnumAction.FINISH_CHANCE},
]

# game with ~300 points
game_300_points = [
    # top
    {"dices": [2, 2, 2, 2, 2], "action": EnumAction.FINISH_ONES},
    {"dices": [2, 2, 2, 3, 3], "action": EnumAction.FINISH_TWOS},
    {"dices": [3, 3, 4, 4, 4], "action": EnumAction.FINISH_THREES},
    {"dices": [4, 4, 4, 4, 5], "action": EnumAction.FINISH_FOURS},
    {"dices": [5, 5, 5, 3, 3], "action": EnumAction.FINISH_FIVES},
    {"dices": [6, 6, 6, 5, 5], "action": EnumAction.FINISH_SIXES},
    # bottom
    {"dices": [6, 6, 6, 6, 6], "action": EnumAction.FINISH_THREE_TIMES},
    {"dices": [6, 6, 6, 1, 2], "action": EnumAction.FINISH_FOUR_TIMES},
    {"dices": [6, 6, 6, 5, 5], "action": EnumAction.FINISH_FULL_HOUSE},
    {"dices": [1, 2, 3, 4, 5], "action": EnumAction.FINISH_SMALL_STREET},
    {"dices": [1, 2, 3, 4, 5], "action": EnumAction.FINISH_LARGE_STREET},
    {"dices": [6, 6, 6, 6, 6], "action": EnumAction.FINISH_KNIFFEL},
    {"dices": [6, 6, 6, 1, 2], "action": EnumAction.FINISH_CHANCE},
]


In [6]:
send_steps("Perfect game:", perfect_game, env_config, True)

Perfect game:
points Con:  1.0
     Reward (FINISH_ONES): 5.0, Kniffel points: 5
points Con:  1.0
points Con:  1.0
points Con:  1.0
     Reward (FINISH_TWOS): 10.0, Kniffel points: 15
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
     Reward (FINISH_THREES): 15.0, Kniffel points: 30
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
     Reward (FINISH_FOURS): 20.0, Kniffel points: 50
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
Bonus reached for custom reward mode.
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
     Reward (FINISH_FIVES): 125.0, Kniffel points: 110
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
points Con:  1.0
     Reward (FINISH_SIXES): 30.0, Kniffel points: 140
points Con:  1.0
p

In [7]:
#send_steps("Normal game (150 points):", game_150_points, env_config, True)

In [8]:
send_steps("Normal game (250 points):", game_250_points, env_config, True)

Normal game (250 points):
     Reward (FINISH_ONES): 0.0, Kniffel points: 0
points Con:  0.6
     Reward (FINISH_TWOS): 6.0, Kniffel points: 6
points Con:  0.6
points Con:  0.6
points Con:  0.4
     Reward (FINISH_THREES): 0.0, Kniffel points: 12
points Con:  0.6
points Con:  0.4
points Con:  0.6
points Con:  0.4
points Con:  0.8
     Reward (FINISH_FOURS): 16.0, Kniffel points: 28
points Con:  0.6
points Con:  0.4
points Con:  0.8
points Con:  0.6
points Con:  0.4
points Con:  0.8
points Con:  0.6
     Reward (FINISH_FIVES): 15.0, Kniffel points: 43
points Con:  0.6
points Con:  0.4
points Con:  0.8
points Con:  0.6
points Con:  0.6
points Con:  0.4
points Con:  0.8
points Con:  0.6
points Con:  0.6
     Reward (FINISH_SIXES): 18.0, Kniffel points: 61
points Con:  0.6
points Con:  0.4
points Con:  0.8
points Con:  0.6
points Con:  0.6
points Con:  0.6
points Con:  0.4
points Con:  0.8
points Con:  0.6
points Con:  0.6
points Con:  1.0
     Reward (FINISH_THREE_TIMES): 30.0, Kniffel po

In [9]:
send_steps("Normal game (300 points):", game_300_points, env_config, True)


Normal game (300 points):
     Reward (FINISH_ONES): 0.0, Kniffel points: 0
points Con:  0.6
     Reward (FINISH_TWOS): 6.0, Kniffel points: 6
points Con:  0.6
points Con:  0.6
points Con:  0.4
     Reward (FINISH_THREES): 0.0, Kniffel points: 12
points Con:  0.6
points Con:  0.4
points Con:  0.6
points Con:  0.4
points Con:  0.8
     Reward (FINISH_FOURS): 16.0, Kniffel points: 28
points Con:  0.6
points Con:  0.4
points Con:  0.8
points Con:  0.6
points Con:  0.4
points Con:  0.8
points Con:  0.6
     Reward (FINISH_FIVES): 15.0, Kniffel points: 43
points Con:  0.6
points Con:  0.4
points Con:  0.8
points Con:  0.6
points Con:  0.6
points Con:  0.4
points Con:  0.8
points Con:  0.6
points Con:  0.6
     Reward (FINISH_SIXES): 18.0, Kniffel points: 61
points Con:  0.6
points Con:  0.4
points Con:  0.8
points Con:  0.6
points Con:  0.6
points Con:  0.6
points Con:  0.4
points Con:  0.8
points Con:  0.6
points Con:  0.6
points Con:  1.0
     Reward (FINISH_THREE_TIMES): 30.0, Kniffel po