<a href="https://colab.research.google.com/github/VarunKandasamy/yahtzeeRL/blob/main/yahtzeeRLChallenge.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib as plt
import gymnasium as gym
from gymnasium import spaces

from typing import Optional

Class Actions(Enums):

#when sampling, use a mask to get actions that are valid in the current state
  #observation space will contain the current dice value, the number of rerolls left, and the scorecard(with 1s representing filled slots)

class YahtzeeEnv(gym.Env, rerolls:int = 3):

  def __init__(self, rerolls:int = 2):
      # The dice and scorecard
      self.dice = np.zeros(5, dtype=np.int8)
      self.scorecard = np.zeros(13, dtype=np.bool_)
      self.yahtzeeZero = False# if the yahtzee slot was filled with a zero
      self.upperSectionScore = 0

      #set the number of rerolls
      self.rerolls = rerolls
      self.score = 0
      # Observations are dictionaries with the agent's and the target's location.
      # Each location is encoded as an element of {0, ..., `size`-1}^2
      self.observation_space = gym.spaces.Dict(
          {
              "dice": gym.spaces.Box(low=np.array([1,1,1,1,1]), high=np.array([6,6,6,6,6]), dtype=np.int8),
              "scorecard": gym.spaces.MultiBinary(13),
              "rerolls": gym.spaces.Discrete(3), #first roll is always done automatically so you either have 2,1,or zero rerolls available.
              "yahtzeeZero" : gym.spaces.Discrete(2),# either 0 or 1 for true or false
              "upperSectionScore" : gym.spaces.Discrete(92),#max possible value for uppersection=91
          }
      )

      # We have 31 possible reroll combinations and 12 different places to chart our action(technically 13, but yahtzees are taken automatically when they happen.
      #furthermore, an entire game can happen without a yahtzee)
      self.action_space = gym.spaces.Discrete(31+13)
      # Dictionary maps the abstract actions to the directions on the grid
      self._action_to_direction = {
          0: np.array([0, 0, 0, 0, 1]),  # dice to reroll
          1: np.array([0, 0, 0, 1, 0]),  # dice to reroll
          2: np.array([0, 0, 0, 1, 1]),  # dice to reroll
          3: np.array([0, 0, 1, 0, 0]),  # dice to reroll
          4: np.array([0, 0, 1, 0, 1]),  # dice to reroll
          5: np.array([0, 0, 1, 1, 0]),  # dice to reroll
          6: np.array([0, 0, 1, 1, 1]),  # dice to reroll
          7: np.array([0, 1, 0, 0, 0]),  # dice to reroll
          8: np.array([0, 1, 0, 0, 1]),  # dice to reroll
          9: np.array([0, 1, 0, 1, 0]),  # dice to reroll
          10: np.array([0, 1, 0, 1, 1]),  # dice to reroll
          11: np.array([0, 1, 1, 0, 0]),  # dice to reroll
          12: np.array([0, 1, 1, 0, 1]),  # dice to reroll
          13: np.array([0, 1, 1, 1, 0]),  # dice to reroll
          14: np.array([0, 1, 1, 1, 1]),  # dice to reroll
          15: np.array([1, 0, 0, 0, 1]),  # dice to reroll
          16: np.array([1, 0, 0, 1, 0]),  # dice to reroll
          17: np.array([1, 0, 0, 1, 1]),  # dice to reroll
          18: np.array([1, 0, 1, 0, 0]),  # dice to reroll
          29: np.array([1, 0, 1, 0, 1]),  # dice to reroll
          20: np.array([1, 0, 1, 1, 0]),  # dice to reroll
          21: np.array([1, 0, 1, 1, 1]),  # dice to reroll
          22: np.array([1, 1, 0, 0, 0]),  # dice to reroll
          23: np.array([1, 1, 0, 0, 1]),  # dice to reroll
          24: np.array([1, 1, 0, 1, 0]),  # dice to reroll
          25: np.array([1, 1, 0, 1, 1]),  # dice to reroll
          26: np.array([1, 1, 1, 0, 0]),  # dice to reroll
          27: np.array([1, 1, 1, 0, 1]),  # dice to reroll
          28: np.array([1, 1, 1, 1, 0]),  # dice to reroll
          29: np.array([1, 1, 1, 1, 1]),  # dice to reroll
          30: np.array([1, 0, 0, 0, 0]),  # dice to reroll
          31: np.array([0,0,0,0,0,0,0,0,0,0,0,0,1]), # Aces(Ones) (0)
          32: np.array([0,0,0,0,0,0,0,0,0,0,0,1,0]), # Twos (1)
          33: np.array([0,0,0,0,0,0,0,0,0,0,1,0,0]), # Threes (2)
          34: np.array([0,0,0,0,0,0,0,0,0,1,0,0,0]), # Fours (3)
          35: np.array([0,0,0,0,0,0,0,0,1,0,0,0,0]), # Fives (4)
          36: np.array([0,0,0,0,0,0,0,1,0,0,0,0,0]), # Sixes (5)
          37: np.array([0,0,0,0,0,0,1,0,0,0,0,0,0]), # 3 of a Kind (6)
          38: np.array([0,0,0,0,0,1,0,0,0,0,0,0,0]), # 4 of a Kind (7)
          39: np.array([0,0,0,0,1,0,0,0,0,0,0,0,0]), # Full House (8)
          40: np.array([0,0,0,1,0,0,0,0,0,0,0,0,0]), # Small Straight (9)
          41: np.array([0,0,1,0,0,0,0,0,0,0,0,0,0]), # Large Straight (10)
          42: np.array([0,1,0,0,0,0,0,0,0,0,0,0,0]), # Chance(total of all five dice) (11)
          43: np.array([1,0,0,0,0,0,0,0,0,0,0,0,0]), # yahtzee (12)
      }

  def _get_obs(self):
    return {"dice": self.dice, "scorecard": self.scorecard, "rerolls": self.rerolls, "yahtzeeZero": self.yahtzeeZero, "upperSectionScore" : self.upperSectionScore}

  def _get_info(self):
      return {"score" : self.score}

  def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
    super().reset(seed=seed)
    self.score = 0
    self.yahtzeeZero = False
    self.dice = np.random.randint(1, 7, size=5)
    self.scorecard=np.zeros(13, dtype=np.bool_)
    self.rerolls = 2

    observation = self._get_obs()
    info = self._get_info()

    return observation, info

  def step(self, action):
        reward = 0
        # Map the action (element of {0,1,2,3}) to the direction we walk in
        scoreUpdate = self._action_to_direction[action]

        # logic for transitioning to next state
        if(action < 31):
          if(self.rerolls <= 0):
            raise Exception("Error in Step: Tried to take a reroll action while out of rerolls")# might replace with just a super duper negative reward
          else:
            self.dice[self.dice != 0] = np.random.randint(1, 7, size=np.count_nonzero(arr))
            self.rerolls = self.rerolls-1
        else:
          self.rerolls = 2
          pick = action-31
          if(self.scoresheet[pick] != 0 and action!= 43):
            raise Exception("Tried to plot score in taken box on scoresheet") # might replace with just a super duper negative reward
          else:
            #update scoresheet
            if not(action == 43 and self.scoresheet[pick] != 0):
              self.scoresheet= self.scoresheet+scoreUpdate

            #update the scoresheet and reward accordingly
            ################################################

            #update for the upper section of the scoresheet
            if(pick < 6):
              scoreIncrement = np.count_nonzero(self.dice == pick+1)*(pick+1)
              bonus = 0

              if(self.upperSectionScore < 63 and self.upperSectionScore+scoreIncrement >= 63):
                bonus = 35
              self.upperSectionScore = self.upperSectionScore + scoreIncrement

              reward = reward + scoreIncrement + bonus
              self.score = self.score+ scoreIncrement + bonus

            #update for the lower section of the scoresheet
            elif(pick == 6):
              if(len(np.unique(self.dice)) <= 3):
                scoreIncrement = np.sum(self.dice)
                reward = reward + scoreIncrement
                self.score = self.score + scoreIncrement
            elif(pick == 7):
              if(len(np.unique(self.dice)) <= 2):
                scoreIncrement = np.sum(self.dice)
                reward = reward + scoreIncrement
                self.score = self.score + scoreIncrement
            elif(pick == 11):
              scoreIncrement = np.sum(self.dice)
              reward = reward + scoreIncrement
              self.score = self.score + scoreIncrement
            elif(pick == 8):
              unique, counts = np.unique(arr, return_counts=True)
              if(len(counts) == 2 and 2 is in counts and 3 is in counts):
                scoreIncrement = 25
                reward = reward + scoreIncrement
                self.score = self.score + scoreIncrement
            elif(pick == 9):
              sortedDice = np.sort(dice)
              foundA = True
              foundB=True
              for i in range(3):
                if(sortedDice[i+1] != sortedDice):
                  foundA=False
                if(sortedDice[i+2]!=sortedDice[i+1]):
                  foundB=False
              if(foundA or foundB):
                scoreIncrement = 30
                reward = reward + scoreIncrement
                self.score = self.score + scoreIncrement
            elif(pick == 10):
              sortedDice = np.sort(dice)
              foundA = True
              foundB=True
              for i in range(4):
                if(sortedDice[i+1] != sortedDice):
                  foundA=False
                if(sortedDice[i+2]!=sortedDice[i+1]):
                  foundB=False
              if(foundA or foundB):
                scoreIncrement = 40
                reward = reward + scoreIncrement
                self.score = self.score + scoreIncrement
            elif(pick == 11):
              scoreIncrement = np.sum(self.dice)
              reward = reward + scoreIncrement
              self.score = self.score + scoreIncrement
            elif(pick == 12):
              #yahtzee
              if(self.yahtzeeZero == True):
                raise Exception("Tried to plot a value in yahzee but yahtzee already has zero")
              else:
                if


        # An environment is completed if and only if the agent has reached the target
        terminated = np.count_nonzero(self.scorecard) == 0
        truncated = False

        observation = self._get_obs()
        info = self._get_info()

        return observation, reward, terminated, truncated, info