<a href="https://colab.research.google.com/github/abtheo/BlackjackRL/blob/master/Blackjack.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Black Jack

---


In [None]:
import random
import numpy as np
import keras
from keras.layers import *
from keras.models import *
from keras.optimizers import *
#import keras.backend as K
from keras import regularizers
from keras.callbacks import *

##Cards & Deck

In [None]:
#Card / Deck module
class Deck:
    def __init__(self, cardList=None):
      self.all_suits = ['hearts', 'diamonds', 'spades', 'clubs']
      self.all_values = ['ace', '2', '3', '4', '5', '6', '7', '8', '9', '10', 'jack', 'queen', 'king']

      if not cardList== None:
          self.cards = cardList
      else:
          self.cards = [Card(value, suit) for value in self.all_values for suit in self.all_suits]

    def __repr__(self):
        temp = []
        for c in self.cards:
            temp.append(c.value + "-" + c.suit)
        return ', '.join(temp)

    #Returns list of ALL cards with ANY properties
    def getAll(self, properties):
        out = []
        #Allows string to be passed
        if isinstance(properties, str):
            properties = [properties]
            
        for c in self.cards:
            for p in properties:
                if c.value == p or c.suit == p:
                    out.append(c)
                    break       
        return out

    #Returns list of cards with JUST properties
    def getSelect(self, values, suits):
        out = []
        #Allows strings to be passed
        if isinstance(values, str):
            values = [values]
        if isinstance(suits, str):
            suits = [suits]  
        for c in self.cards:
            for v in values:
                for s in suits:
                    if c.value == v and c.suit == s:
                        out.append(c)                   
        return out

    #Draw hand of N size
    def drawHand(self, hand_size=2):
      hand = Hand()
      for i in range(hand_size):
        card = random.choice(self.cards)
        hand.addCards(card)
        self.cards.remove(card)

      return hand

    #Draw card, removing from deck
    def drawCard(self):
      card = random.choice(self.cards)
      self.cards.remove(card)
      return card
     
       
class Card:
    def __init__(self, value, suit=None):
      self.all_suits = ['hearts', 'diamonds', 'spades', 'clubs']
      self.all_values = ['ace', '2', '3', '4', '5', '6', '7', '8', '9', '10', 'jack', 'queen', 'king']
      self.points = 0

      if suit==None:
        suit = random.choice(self.all_suits)

      if value in self.all_values and suit in self.all_suits:
        self.value = value
        self.suit = suit

        self.points = self.valueToPoints(value)
      else:
        raise Exception('Invalid value {0} given for a playing card'.format(value))

    def __repr__(self):
        return "{0}-{1}".format(self.value, self.suit)

    def __int__(self):
      return self.points

    #Blackjack specific point encoding
    def valueToPoints(self, value):
      if not value in self.all_values:
        return None
      
      if value == 'jack' or value == 'queen' or value == 'king':
        return 10
      #Ace - what the fuck do I do here
      elif value == 'ace':
        return 11
      else:
        return int(value)

class Hand:
  def __init__(self, cards=None):
    self.cards = cards

  def __getitem___(self, key):
    if cards == None:
      raise Exception("No cards in hand, cannot get index")
    return self.cards[key]

  def __str__(self):
    return str(self.cards)
  
  def getPoints(self, cards=None):
    if cards == None:
      cards = self.cards

    total = sum([card.points for card in cards])
    
    card_values = [card.value for card in cards]
    if "ace" in card_values and total > 21:
      return total - 10
    else:
      return total

  def addCards(self, cards):
    #Allows single card to be passed
    if isinstance(cards, Card):
        cards = [cards]
    if self.cards == None:
      self.cards = cards
    else:
    #Concatenate to hand    
      self.cards += cards


#Agents

In [None]:
class RandomAgent():
  def __init__(self, hand=None):
    self.hand = hand
    self.action_space = [0,1,2] #Just Hit or Stick for now
    self.starting_bet = 10

  def setHand(self, hand):
    self.hand = hand
  
  def determineAction(self): #, game_state
    return random.choice(self.action_space)

#Hit on 16, Stand on 17
class Dealer():
  def __init__(self, hand=None):
    self.hand = hand
    self.end_state = 0

  def getShowingCard(self):
    return self.hand.cards[0]

  def setHand(self, hand):
    self.hand = hand
  
  def play_step(self, verbose):
    if hand == None:
      raise Exception("Dealer has no hand, cannot play.")

    points = self.hand.getPoints()
    if verbose > 0: print("Dealer points:", points)
    #Bust! Dealer loses
    if points > 21:
      if verbose > 0: print("Dealer Bust!")
      self.end_state =  -1
    #Stand on 17 +
    elif points >= 17:
      if verbose > 0: print("Dealer sticks.")
      self.end_state =  1
    #Hit on 16 - 
    else:
      if verbose > 0: print("Dealer hits!")
      self.end_state =  0

    

    

  def playRound(self, deck, verbose=0):
    if verbose > 0: print("Dealer has: ", self.hand.cards)
    while self.end_state == 0:
      self.play_step(verbose=verbose)

      if self.end_state == 0:
        new_card = deck.drawCard()
        self.hand.addCards(new_card)
        if verbose > 0: print("Dealer draws ", new_card)

    return self.end_state


In [None]:
#Instantialise deck of cards and dealer
deck = Deck()
dealer = Dealer()
#Draw 2 cards from deck
hand = deck.drawHand() 
#Give hand to Dealer
dealer.setHand(hand)
#Let Dealer play out round
dealer.playRound(deck)

1

#Game Environment

In [None]:
class Game:
	def __init__(self, player=RandomAgent()):		
		self.deck = Deck()
		self.dealer = Dealer()
		self.player = player
		self.bet = 0
	
	def reset(self):
		self.deck = Deck()
		#Instatiate players and deal their hands
		self.player = RandomAgent(self.deck.drawHand(2))
		self.dealer = Dealer(self.deck.drawHand(2))

		self.bet = self.player.starting_bet

	def playDealerGame(self,verbose=0):		
		self.dealer_outcome = self.dealer.playRound(self.deck,verbose=verbose)

	def step(self, action, verbose=0):
		if verbose > 0:	print("Player has:", self.player.hand)
		#Player Hits
		if action == 0:
			#Draw card, add to player's hand
			card = self.deck.drawCard()
			self.player.hand.addCards(card)
			player_bust = self.isHandBust(self.player.hand)
			if verbose > 0:
				print("Player draws: ", card)
				print("Player points: ", self.player.hand.getPoints())

			if player_bust:
				#Observation, reward, done
				return ([self.player.hand.getPoints(), self.dealer.getShowingCard().points], -self.bet, -1)
			else:
				return ([self.player.hand.getPoints(), self.dealer.getShowingCard().points], 1, 0)


		#Player Sticks
		elif action == 1:
			if verbose > 0: print("Player sticks on ", self.player.hand.getPoints())
			
	 		#Dealer sticks
			if self.dealer_outcome > 0:
				#Dealer wins
				if self.dealer.hand.getPoints() >= self.player.hand.getPoints():
					return ([self.player.hand.getPoints(), self.dealer.getShowingCard().points] , -self.bet, -1)
				else:
				#Dealer loses
					return ([self.player.hand.getPoints(), self.dealer.getShowingCard().points] , self.bet, 1)

		 		#Dealer loses
			else:
				return ([self.player.hand.getPoints(), self.dealer.getShowingCard().points] , self.bet, 1)

		#Action == 2:
		#Double Down - Bet 2x, hit once and stick.
		elif action == 2:
			if verbose > 0: print("Player Doubles Down!")# 
			#Draw card, add to player's hand
			card = self.deck.drawCard()
			self.player.hand.addCards(card)
			player_bust = self.isHandBust(self.player.hand)
	 
			if verbose > 0:
				print("Player draws: ", card)
				print("Player points: ", self.player.hand.getPoints())
				print("Player Bust? ", player_bust)
		
			if player_bust:
				#Observation, reward, done
				return ([self.player.hand.getPoints(), self.dealer.getShowingCard().points], -self.bet*2, -1)

			#Dealer sticks
			if self.dealer_outcome > 0:
				#Dealer wins
				if self.dealer.hand.getPoints() >= self.player.hand.getPoints():
					return ([self.player.hand.getPoints(), self.dealer.getShowingCard().points] , -self.bet*2, -1)
				else:
				#Dealer loses
					return ([self.player.hand.getPoints(), self.dealer.getShowingCard().points] , self.bet*2, 1)

		 		#Dealer bust
			else:
				return ([self.player.hand.getPoints(), self.dealer.getShowingCard().points] , self.bet*2, 1)



	def isHandBust(self, hand):
		if hand == None:
			raise Exception("None hand given to evaluation function")

		points = hand.getPoints()
		#Bust! Player loses
		if points > 21:
			return True
		#Game can continue
		else:
			return False
		
  # def step(self, action):
	# 	next_state, value, done = self.gameState.takeAction(action)
	# 	self.gameState = next_state
	# 	self.currentPlayer = -self.currentPlayer
	# 	info = None
	# 	return ((next_state, value, done, info))


#Main Loop

In [None]:
env = Game()
game_memory = []

for i in range(10):
  env.reset()
  done = 0

  env.playDealerGame()
  prev_obs = [env.player.hand.getPoints(), env.dealer.getShowingCard().points]
  while done == 0:
    action = env.player.determineAction()
    #print("Action: ", action)
    
    observation, reward, done = env.step(action)

    one_hot_action = [1,0] if action == 0 else [0,1]
    if reward >= 1:
      game_memory.append([prev_obs, one_hot_action])

    prev_obs = observation

print(game_memory)

[[[12, 10], [1, 0]], [[15, 10], [0, 1]], [[17, 3], [0, 1]], [[8, 7], [1, 0]], [[14, 3], [1, 0]], [[16, 3], [0, 1]], [[4, 10], [1, 0]], [[17, 10], [1, 0]], [[18, 10], [0, 1]]]


#Training Data

In [None]:
X = np.array(game_memory)[:,0]
Y = np.array(game_memory)[:,1]

X = X.reshape((-1, 2))
print(X.shape, Y.shape)

#Model Definition

In [None]:
#Returns a compiled neural network model, ready for training
def build_nn_model(features=2, out_shape=3):  
  input_layer = Input(shape=[features])
  x = Dense(128, activation='relu', kernel_regularizer=regularizers.l1_l2(0.001,0.001))(input_layer)
  x = Dropout(rate=0.2)(x)

  x = Dense(256,activation='relu', kernel_regularizer=regularizers.l1_l2(0.001,0.001))(x)
  x = Dropout(rate=0.2)(x)

  x = Dense(128, activation='relu', kernel_regularizer=regularizers.l1_l2(0.001,0.001))(x)
  x = Dropout(rate=0.2)(x)

  output_layer = Dense(out_shape, activation='softmax')(x)

  model = Model(inputs = input_layer, outputs = output_layer)

  model.compile(optimizer=Adam(lr=0.0001, decay=1e-6),
                loss=['categorical_crossentropy'])
  
  return model



In [None]:
model = build_nn_model()

history = model.fit(X, Y, validation_split=0.2, batch_size=32, epochs=7)

In [None]:

x = np.array([[1, 10]])

model.predict(x)
#print(X[0].ravel().shape)

#Main Loop with Basic Model

In [None]:
env = Game()
game_memory = []

player_wins = []
for i in range(1000):
  print("========= STARTING =========")
  env.reset()
  done = 0
  print("======== DEALER TURN ========")
  env.playDealerGame(verbose=2)
  prev_obs = [env.player.hand.getPoints(), env.dealer.getShowingCard().points]
  
  print("======== PLAYER TURN ========")
  while done == 0:

    #action = env.player.determineAction()
    np_state = np.array([prev_obs])
    action = np.argmax(model.predict(np_state))

    #print("Action: ", action)
    
    observation, reward, done = env.step(action, verbose=12)

    # one_hot_action = [0,1] if action == 0 else [1,0]
    # if reward >= 1:
    #   game_memory.append([prev_obs, one_hot_action])

    prev_obs = observation

    if done == 1:
      print("WINNER ////// Player won!")
      player_wins.append(1)
    elif done == -1:
      print("WINNER +++++ Dealer won!")
      player_wins.append(0) 

print(sum(player_wins) / len(player_wins) * 100)
#print(game_memory)

#DQN Agent

In [None]:
from collections import deque
from sklearn.preprocessing import OneHotEncoder

REPLAY_MAX_SIZE = 50_000
REPLAY_MIN_SIZE = 128
MINIBATCH_SIZE = 64
UPDATE_EVERY = 5
DISCOUNT = 0.99
norm = 30 #Max hand point value to normalize states
action_space = [0,1,2] #Hit, Stick, Double_down


class DQNAgent:
  def __init__(self, hand=None):
    self.hand = hand
    self.action_space = action_space#Just Hit or Stick for now
    self.starting_bet = 10
    #Main model
    self.model = build_nn_model(features=2, out_shape = len(self.action_space))

    #Target model
    self.target_model = build_nn_model(features=2, out_shape = len(self.action_space))
    self.target_model.set_weights(self.model.get_weights())
    self.target_update_counter = 0
    
    #Replay Memory
    self.replay_memory = deque(maxlen=REPLAY_MAX_SIZE)


  def setHand(self, hand):
    self.hand = hand
    
  #Transition = (s0, a, r, s1, d)
  def update_replay_memory(self, transition):
    self.replay_memory.append(transition)

  def get_Q(self, state):
    return self.model.predict(np.array(state))[0]

  def train(self, terminal_state, step):
    if len(self.replay_memory) < REPLAY_MIN_SIZE:
      return

    minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)

    current_states = np.array([transition[0] for transition in minibatch]) / norm
    current_qs_list = self.model.predict(current_states)

    new_current_states = np.array([transition[3] for transition in minibatch]) / norm
    future_qs_list = self.target_model.predict(new_current_states)

    X = []
    y = []

    for index, (state, action, reward, new_state, done) in enumerate(minibatch):
      if not done:
        max_future_Q = np.max(future_qs_list[index])
        new_Q = reward + DISCOUNT * max_future_Q
      else:
        new_Q = reward

      current_qs = current_qs_list[index]
      current_qs[action] = new_Q

      X.append(state)
      y.append(current_qs)

    self.model.fit(np.array(X)/norm, np.array(y), batch_size=MINIBATCH_SIZE, verbose=0, shuffle=False)

    #If done, i.e. done != 0
    if not terminal_state == 0:
      self.target_update_counter += 1

    if self.target_update_counter > UPDATE_EVERY:
      self.target_model.set_weights(self.model.get_weights())
      self.target_update_counter = 0

    


#Train DQN

In [None]:
from tqdm import tqdm
player_wins = []


#Explortation
epsilon = 1
EPSILON_DECAY = 0.99995
MIN_EPSILON = 0.01

#agent = DQNAgent()
env = Game()
for i in range(100_000):
  #print("========= STARTING =========")
  env.reset()
  done = 0
  reward = 0
  score = 0
  step = 1
  #print("======== DEALER TURN ========")
  env.playDealerGame(verbose=0)
  state = [env.player.hand.getPoints(), env.dealer.getShowingCard().points]
  
  #print("======== PLAYER TURN ========")
  while done == 0:

    np_state = np.array([state]) / norm

    if np.random.random() > epsilon:
      action = np.argmax(agent.get_Q(np_state))
    else:
      action = np.random.randint(0, len(agent.action_space))
    
    new_state, reward, done = env.step(action, verbose=0)

    score += reward

    agent.update_replay_memory((state, action, score, new_state, done))
    agent.train(done, step)

    state = new_state
    step += 1

  if (i) % 1000 == 0:
    print(i)

  # Decay epsilon
  if epsilon > MIN_EPSILON:
      epsilon *= EPSILON_DECAY
      epsilon = max(MIN_EPSILON, epsilon)

#print(sum(player_wins) / len(player_wins) * 100)
#print(game_memory)

#Test DQN

In [None]:
env = Game()
player_wins = []
wallet = 0
for i in range(1000):
  print("========= STARTING =========")
  env.reset()
  done = 0
  print("======== DEALER TURN ========")
  env.playDealerGame(verbose=2)
  state = [env.player.hand.getPoints(), env.dealer.getShowingCard().points]
  score = 0
  wage = 0
  print("======== PLAYER TURN ========")
  while done == 0:

    np_state = np.array([state]) / norm

    action = np.argmax(agent.get_Q(np_state))
    print(agent.get_Q(np_state))
    
    new_state, reward, done = env.step(action, verbose=12)
    
    score += reward

    state = new_state

    if not reward==1:
      wage += reward


  if done == 1:
    print("WINNER ////// Player won!")
  elif done == -1:
    print("WINNER +++++ Dealer won!")

  player_wins.append(score)
  wallet += wage

print(np.mean(player_wins))
print(wallet)
#print(sum(player_wins) / len(player_wins) * 100)
#print(game_memory)

Dealer has:  [6-diamonds, 2-clubs]
Dealer points: 8
Dealer hits!
Dealer draws  ace-clubs
Dealer points: 19
Dealer sticks.
[0.000000e+00 1.000000e+00 5.890672e-35]
Player has: [5-spades, 9-spades]
Player sticks on  14
WINNER +++++ Dealer won!
Dealer has:  [9-diamonds, jack-clubs]
Dealer points: 19
Dealer sticks.
[0.0000000e+00 1.0000000e+00 2.1733822e-35]
Player has: [6-hearts, 8-diamonds]
Player sticks on  14
WINNER +++++ Dealer won!
Dealer has:  [queen-clubs, 2-clubs]
Dealer points: 12
Dealer hits!
Dealer draws  jack-hearts
Dealer points: 22
Dealer Bust!
[2.8693523e-27 1.0000000e+00 1.1348179e-23]
Player has: [3-diamonds, queen-diamonds]
Player sticks on  13
WINNER ////// Player won!
Dealer has:  [queen-spades, 8-spades]
Dealer points: 18
Dealer sticks.
[1.0000000e+00 3.3476506e-09 3.0360627e-11]
Player has: [7-clubs, 4-clubs]
Player draws:  9-diamonds
Player points:  20
[0. 1. 0.]
Player has: [7-clubs, 4-clubs, 9-diamonds]
Player sticks on  20
WINNER ////// Player won!
Dealer has:  [

#Save Agent

In [None]:
agent.model.save_weights("model.h5")
agent.target_model.save_weights("target_model.h5")