In [38]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import time

In [39]:
class MC(object):

    def __init__(self):
        self.player_state_value = {}
        self.player_states = []
        self.player_win = 0     # Number of player wins
        self.player_draw = 0    # Number of player ties

    # Get a card - Assumes infinte number of decks mixed together
    @staticmethod
    def giveCard():
        cards = [1,2,3,4,5,6,7,8,9,10,10,10]
        return np.random.choice(cards)
    

    def dealerLogic(self, val, ace):
        if val > 21:
            if ace:
                val -= 10
                ace = False
            else:
                return val, ace, True    
        # Assuming dealer stands on hard or soft 17, can be changed later
        if val >= 17:
            return val, ace, True
        return self.aceHelper(val,ace)

    
    def playerLogic(self, val, ace):
        if val > 21:
            # Calculate ace value if player sum is too high, return loss if no ace
            if ace:
                val -= 10
                ace = False
            else:
                return val, ace, True
            
        # Don't hit 20- this can be changed later for testing
        if val >= 20:
            return val, ace, True
        return self.aceHelper(val,ace)
    

    def aceHelper(self, val, ace):
        card = self.giveCard()
        if card == 1:
            if val + 11 > 21:
                # Card must be hard ace (1)
                return val + 1, ace, False
            else:
                # Card can be a soft ace (11)
                return val + 11, True, False
        else:
            return val+card, ace, False
            

    def reinforce(self, player_val, dealer_val, end=True):
        if end:
            # Evaluate only if player is done playing

            last_state = self.player_states[-1]
            if player_val > 21:
                # if dealer_val > 21:
                    # self.player_draw += 1
                #else:
                    #self.player_state_value[last_state] -= 1

                ### I think there was an issue here- Player should lose because they play before dealer
                self.player_state_value[last_state] -= 1

            else:
                # Dealer busted and player has valid sum
                if dealer_val > 21:
                    self.player_state_value[last_state] += 1
                    self.player_win += 1
                else:
                    # Dealer has higher score- lose
                    if player_val < dealer_val:
                        self.player_state_value[last_state] -= 1

                    # Player has higher score- win
                    elif player_val > dealer_val:
                        self.player_state_value[last_state] += 1
                        self.player_win += 1
                    else:
                        # Draw- player/ dealer tied
                        self.player_draw += 1


    def play(self, rounds=1000):
        times = []  
        last_time = time.time()

        for round in range(rounds):
            if round % 1000 == 0:  # Approximately every 10% of rounds
                # Additional code to evaluate what seems like exponential time when iterating
                current_time = time.time()
                elapsed_time = current_time - last_time  
                times.append(elapsed_time)  
                last_time = current_time  
                print(f"Round: {round}")

            player_val = 0

            # Deal to dealer
            dealer_val = self.giveCard()
            up_card = dealer_val
            dealer_val += self.giveCard()

            # Player logic (Hit if less than 12)

            ace = False
            while True:
                player_val, ace, end = self.playerLogic(player_val, ace)

                if end: 
                    break
                # when value goes higher than 12, record states
                if (player_val >= 12) and (player_val <= 21):
                    self.player_states.append((player_val, up_card, ace))
            # print("Player sum:", player_value)

            # Dealer logic
            ace, end = False, False
            while not end:
                dealer_val, ace, end = self.dealerLogic(dealer_val, ace)
            # print("Dealer sum:", dealer_value)

            # Find Winner
            for choice in self.player_states:
                if choice in self.player_state_value:
                    self.player_state_value[choice] = self.player_state_value.get(choice)
                else:
                    self.player_state_value[choice] = 0

            self.reinforce(player_val, dealer_val) 
        
        # Plot the time evaluation (for development)
        plt.figure(figsize=(10, 6))
        plt.plot(times, marker='o')
        plt.xlabel('Intervals (~10% rounds)')
        plt.ylabel('Time elapsed (s)')
        plt.title('Time Elapsed Between Prints')
        plt.grid(True)
        plt.show()
            

In [40]:
bot = MC()
iterations = 30000
bot.play(iterations) # Alter value for testing, document results based on iterations

print("Win rate:", bot.player_win/iterations)
print("Not losing rate", (bot.player_win+bot.player_draw)/iterations)


Round: 0
Round: 3000
Round: 6000
Round: 9000
Round: 12000
Round: 15000
Round: 18000
Round: 21000
Round: 24000
Round: 27000
