# Qubit Tug-of-War Bot Testing Notebook

In [None]:
%run GamePlayer.py
# REPLACE below with your test file (or comment out if developing in notebook)
%run austin_basic_bot.py

In [None]:
# This is the entire Random Bot code for reference and testing
class RandomBot(GameBot):
    def play_action(self,
                    team: int,
                    round_number: int,
                    hand: List[GameAction],
                    prev_turn: List) -> Optional[GameAction]:
        
        #this is the probability that it chooses to play an action
        p = 0.2
        
        #if the hand is non-empty and we flip a coin and it lands heads with probability p,
        #choose a random action
        if len(hand) > 0 and np.random.random() < p:
            action = random.choice(hand)
            return action
        
        #otherwise, don't play an action
        return None

In [None]:
# The basic set-up for the MyStrategy Class
# Uncomment if you want to develop class in this notebook
# '''
# Insert high-level explanation of your strategy here. Why did you design this strategy?
# When should it work well, and when should it have trouble?
# '''
# class MyStrategy(GameBot):

#     '''
#         Initialize your bot here. The init function must take in a bot_name.
#         You can use this to initialize any variables or data structures
#         to keep track of things in the game
#     '''
#     def __init__(self,bot_name):
#         self.bot_name = bot_name        #do not remove this
    
#     def play_action(self,
#                     team: int,
#                     round_number: int,
#                     hand: List[GameAction],
#                     prev_turn: List) -> Optional[GameAction]:
        

#         ##### IMPLEMENT AWESOME STRATEGY HERE ##################
        
        
#         #######################################################
#         return None

In [None]:
# Run a single game pitting some MyStrategy class vs Random Bot
# Prints the winning state and a log of the game
stratbot = MyStrategy("MyStrat")
randombot = RandomBot("The Randos")
# stratbot as team |0> and randombot as team |1>
gp = GamePlayer(randombot, stratbot)
winning_state = gp.play_rounds()
print(winning_state)
log = gp.get_event_log()
print(log)

In [None]:
# Test MyStrategy class vs Random Bot 10,000 times for each team value
win_counter = 0
for i in range(10000):
    stratbot = MyStrategy("MyStrat")
    randombot = RandomBot("The Randos")
    gp = GamePlayer(stratbot, randombot)
    winning_state = gp.play_rounds()
    if winning_state[0] == 1:
        win_counter += 1
print(f'Percent win as team 0: {win_counter/100.0}%')

win_counter = 0
for i in range(10000):
    stratbot = MyStrategy("MyStrat")
    randombot = RandomBot("The Randos")
    gp = GamePlayer(randombot, stratbot)
    winning_state = gp.play_rounds()
    if winning_state[0] == 0:
        win_counter += 1
print(f'Percent win as team 1: {win_counter/100.0}%')

Every team must work independently on their bot (teams should not share code). Your bots will be graded in the following way. Points will be awarded based on a combination of:
1. Effort and creativity,
2. Whether your code includes in the beginning a discussion about your strategy and your rationale behind the design choices, and
3. Performance against other bots. 

As a target, you should try to design a bot that consistently beats `RandomBot` at least 60% of the time.





In [None]:
'''
Looks only at current round maxing and assumes opponent has no impact.
Does NOT take into account rotation
Treats measurement as if it is equivalent to passing
Coded w/out taking into account turn order correctly if team 1
vs. Random Bot 10000 games:
Percent win as team 0: 62.14%
Percent win as team 1: 56.17%
'''
class MyStrategy_V1(GameBot):
    '''
        Initialize your bot here. The init function must take in a bot_name.
        You can use this to initialize any variables or data structures
        to keep track of things in the game
    '''
    def __init__(self, bot_name):
        self.bot_name = bot_name        #do not remove this
        self.state = np.array([np.sqrt(0.5), np.sqrt(0.5)])
        self.direction = 1
        self.theta = float(2*np.pi/100.0)
        self.test_state = np.array([np.sqrt(0.5), np.sqrt(0.5)])
        self.round = 0
        self.dealt_count = 0
        self.hand_size = 0
    
    def rotation(self):
        thet = float(self.theta * self.direction)
        rotate = np.array([[np.cos(thet / 2), -np.sin(thet / 2)], 
                           [np.sin(thet / 2), np.cos(thet / 2)]])
        self.state = np.dot(rotate, self.state)
    
    def update_state(self, action):
        if action == GameAction.PAULIX:
            X = np.array([[0, 1], [1, 0]])
            self.state = np.dot(X, self.state)
        elif action == GameAction.PAULIZ:
            Z = np.array([[1, 0], [0, -1]])
            self.state = np.dot(Z, self.state)
        elif action == GameAction.HADAMARD:
            H = np.array([[np.sqrt(1/2), np.sqrt(1/2)], [np.sqrt(1/2), -np.sqrt(1/2)]])
            self.state = np.dot(H, self.state)
        elif action == GameAction.REVERSE:
            self.direction = -1 * self.direction
    
    def update_with_prev(self, prev_turn):
        # Update state model based on previous turn and rotations
        prev_actions = [prev_turn['team0_action'], prev_turn['team1_action']]
        prev_measures = [prev_turn['team0_measurement'], prev_turn['team1_measurement']]
        for i in range(2):
            if prev_actions[i] is not None:
                if prev_actions[i] == GameAction.MEASURE:
                    self.state = prev_measures[i]
                else:
                    self.update_state(prev_actions[i])
        self.rotation()
            
    def try_action(self, action, goal):
        # Update test state, keep as is if measure state
        if action == GameAction.PAULIX:
            X = np.array([[0, 1], [1, 0]])
            self.test_state = np.dot(X, self.test_state)
        elif action == GameAction.PAULIZ:
            Z = np.array([[1, 0], [0, -1]])
            self.test_state = np.dot(Z, self.test_state)
        elif action == GameAction.HADAMARD:
            H = np.array([[np.sqrt(1/2), np.sqrt(1/2)], [np.sqrt(1/2), -np.sqrt(1/2)]])
            self.test_state = np.dot(H, self.test_state)
        # Get the 0 state probability p if team 0, 1-p for team 1
        probability = self.test_state[0]**2
        if probability < 0:
            probability = 0
        elif probability > 1:
            probability = 1
        if goal == 1:
            probability = 1 - probability
        return probability

    def play_action(self,
                    team: int,
                    round_number: int,
                    hand: List[GameAction],
                    prev_turn: List) -> Optional[GameAction]:
        ##### IMPLEMENT AWESOME STRATEGY HERE ##################
        # Update qubit state based on prev round
        if self.round != 0:
            self.update_with_prev(prev_turn)
        # Increment round counter
        self.round += 1
        # Check if we were dealt cards and update counters
        if len(hand) > self.hand_size:
            self.dealt_count += len(hand) - self.hand_size
            self.hand_size = len(hand)
        # If not near end of game or at hand limit, hoard cards
        if self.hand_size < 5 and self.round <= 98:
            return None
        # Use reverse action first
        if GameAction.REVERSE in hand:
            self.hand_size -= 1
            return GameAction.REVERSE
        # Test out actions in hand for which maximizes win prob
        # CURRENTLY DOES NOT TAKE INTO ACCOUNT ROTATIONS
        best_prob = 0
        best_action = None
        for action in hand:
            self.test_state = np.copy(self.state)
            temp_prob = self.try_action(action, team)
            if temp_prob > best_prob:
                best_prob = temp_prob
                best_action = action
        # Check if passing is optimal compared to hand
        self.test_state = np.copy(self.state)
        temp_prob = self.try_action(None, team)
        if temp_prob >= best_prob:
            return None
        self.hand_size -= 1
        return best_action
    #######################################################

In [None]:
'''
Looks only at current round maxing and assumes opponent has no impact.
Takes into account rotation
Treats measurement as if it is equivalent to passing
Coded w/out taking into account turn order correctly if team 1
vs. Random Bot 10000 games:
Percent win as team 0: 80.14%
Percent win as team 1: 66.6%
'''
class MyStrategy_V2(GameBot):
    '''
        Initialize your bot here. The init function must take in a bot_name.
        You can use this to initialize any variables or data structures
        to keep track of things in the game
    '''
    def __init__(self, bot_name):
        self.bot_name = bot_name        #do not remove this
        self.state = np.array([np.sqrt(0.5), np.sqrt(0.5)])
        self.direction = 1
        self.theta = float(2*np.pi/100.0)
        self.test_state = np.array([np.sqrt(0.5), np.sqrt(0.5)])
        self.test_direction = 1
        self.round = 0
        self.dealt_count = 0
        self.hand_size = 0

    def update_state(self, action):
        if action == GameAction.PAULIX:
            X = np.array([[0, 1], [1, 0]])
            self.state = np.dot(X, self.state)
        elif action == GameAction.PAULIZ:
            Z = np.array([[1, 0], [0, -1]])
            self.state = np.dot(Z, self.state)
        elif action == GameAction.HADAMARD:
            H = np.array([[np.sqrt(1/2), np.sqrt(1/2)], [np.sqrt(1/2), -np.sqrt(1/2)]])
            self.state = np.dot(H, self.state)
        elif action == GameAction.REVERSE:
            self.direction = -1 * self.direction
    
    def update_with_prev(self, prev_turn):
        # Update state model based on previous turn and rotations
        prev_actions = [prev_turn['team0_action'], prev_turn['team1_action']]
        prev_measures = [prev_turn['team0_measurement'], prev_turn['team1_measurement']]
        for i in range(2):
            if prev_actions[i] is not None:
                if prev_actions[i] == GameAction.MEASURE:
                    self.state = prev_measures[i]
                else:
                    self.update_state(prev_actions[i])
        thet = float(self.theta * self.direction)
        rotate = np.array([[np.cos(thet / 2), -np.sin(thet / 2)], 
                           [np.sin(thet / 2), np.cos(thet / 2)]])
        self.state = np.dot(rotate, self.state)
            
    def try_action(self, action, goal):
        # Update test state/direction depending on action tried
        # If a measurement, pretends equivalent to passing
        if action == GameAction.PAULIX:
            X = np.array([[0, 1], [1, 0]])
            self.test_state = np.dot(X, self.test_state)
        elif action == GameAction.PAULIZ:
            Z = np.array([[1, 0], [0, -1]])
            self.test_state = np.dot(Z, self.test_state)
        elif action == GameAction.HADAMARD:
            H = np.array([[np.sqrt(1/2), np.sqrt(1/2)], [np.sqrt(1/2), -np.sqrt(1/2)]])
            self.test_state = np.dot(H, self.test_state)
        elif action == GameAction.REVERSE:
            self.test_direction = -1 * self.test_direction
        # Rotate test qubit (this assumes your opponent does nothing)
        thet = float(self.theta * self.test_direction)
        rotate = np.array([[np.cos(thet / 2), -np.sin(thet / 2)], 
                           [np.sin(thet / 2), np.cos(thet / 2)]])
        self.test_state = np.dot(rotate, self.test_state)
        # Get the 0 state probability p if team 0, 1-p for team 1
        probability = self.test_state[0]**2
        if probability < 0:
            probability = 0
        elif probability > 1:
            probability = 1
        if goal == 1:
            probability = 1 - probability
        return probability

    def play_action(self,
                    team: int,
                    round_number: int,
                    hand: List[GameAction],
                    prev_turn: List) -> Optional[GameAction]:
        ##### IMPLEMENT AWESOME STRATEGY HERE ##################
        # Update qubit state based on prev round
        if self.round != 0:
            self.update_with_prev(prev_turn)
        # Increment round counter
        self.round += 1
        # Check if we were dealt cards and update counters
        if len(hand) > self.hand_size:
            self.dealt_count += len(hand) - self.hand_size
            self.hand_size = len(hand)
        # If not near end of game or at hand limit, hoard cards
        if self.hand_size < 5 and self.round <= 98:
            return None
        # Test out actions in hand for max win prob (and passing)
        # Treats game as if ending and only plans one step ahead
        # Compares to what qubit looks like after rotation!
        best_prob = 0
        best_action = None
        for i in range(len(hand)+1):
            if i == len(hand):
                action = None
            else:
                action = hand[i]
            self.test_state = np.copy(self.state)
            temp_prob = self.try_action(action, team)
            if temp_prob > best_prob:
                best_prob = temp_prob
                best_action = action
        if best_action is not None:
            self.hand_size -= 1
        return best_action
    #######################################################

In [None]:
'''
Looks only at current round maxing and assumes opponent has no impact.
Takes into account rotation
Treats measurement as if it is equivalent to passing
Correctly updates state even if playing as team 1
vs. Random Bot 10000 games:
Percent win as team 0: 85.69%
Percent win as team 1: 85.67%
'''
class MyStrategy_V3(GameBot):
    '''
        Initialize your bot here. The init function must take in a bot_name.
        You can use this to initialize any variables or data structures
        to keep track of things in the game
    '''
    def __init__(self, bot_name):
        self.bot_name = bot_name        #do not remove this
        self.state = np.array([np.sqrt(0.5), np.sqrt(0.5)])
        self.direction = 1
        self.theta = float(2*np.pi/100.0)
        self.test_state = np.array([np.sqrt(0.5), np.sqrt(0.5)])
        self.test_direction = 1
        self.round = 0
        self.dealt_count = 0
        self.hand_size = 0
        
    def rotate(self, state, direction):
        theta = float(self.theta * direction)
        rotate = np.array([[np.cos(theta / 2), -np.sin(theta / 2)], 
                           [np.sin(theta / 2), np.cos(theta / 2)]])
        state = np.dot(rotate, state)
        return state

    def update_state(self, action, state, direction):
        if action == GameAction.PAULIX:
            X = np.array([[0, 1], [1, 0]])
            state = np.dot(X, state)
        elif action == GameAction.PAULIZ:
            Z = np.array([[1, 0], [0, -1]])
            state = np.dot(Z, state)
        elif action == GameAction.HADAMARD:
            H = np.array([[np.sqrt(1/2), np.sqrt(1/2)], [np.sqrt(1/2), -np.sqrt(1/2)]])
            state = np.dot(H, state)
        elif action == GameAction.REVERSE:
            direction *= -1
        return state, direction
            
    def update_team(self, prev_turn, team):
        prev_action = prev_turn[f'team{team}_action']
        prev_measurement = prev_turn[f'team{team}_measurement']
        if prev_action is not None:
            if prev_action == GameAction.MEASURE:
                self.state = prev_measurement
            else:
                self.state, self.direction = self.update_state(prev_action, self.state, self.direction)
        
    def update_with_prev(self, prev_turn, team):
        # Update state array based on previous turn
        if team == 0:
            self.update_team(prev_turn, 0)
            self.update_team(prev_turn, 1)
            self.state = self.rotate(self.state, self.direction)
        else:
            self.update_team(prev_turn, 1)
            self.state = self.rotate(self.state, self.direction)
            self.update_team(prev_turn, 0)
            
    def try_action(self, action, team):
        # Update test state/direction depending on action tried
        # If a measurement, pretends equivalent to passing
        self.test_state, self.test_direction = self.update_state(action, self.test_state, self.test_direction)
        # Rotate test qubit (this assumes your opponent does nothing if you are team 0)
        self.test_state = self.rotate(self.test_state, self.test_direction)
        # Get the 0 state probability p if team 0, 1-p for team 1
        probability = self.test_state[0]**2
        if probability < 0:
            probability = 0
        elif probability > 1:
            probability = 1
        if team == 1:
            probability = 1 - probability
        return probability

    def play_action(self,
                    team: int,
                    round_number: int,
                    hand: List[GameAction],
                    prev_turn: List) -> Optional[GameAction]:
        ##### IMPLEMENT AWESOME STRATEGY HERE ##################
        # Update qubit state based on prev round
        if self.round == 0 and team == 1:
            self.update_team(prev_turn, 0)
        elif self.round > 0:
            self.update_with_prev(prev_turn, team)
        # Increment round counter
        self.round += 1
        # Check if we were dealt cards and update counters
        if len(hand) > self.hand_size:
            self.dealt_count += len(hand) - self.hand_size
            self.hand_size = len(hand)
        # If not near end of game or at hand limit, hoard cards
        if self.hand_size < 5 and self.round <= 98:
            return None
        # Test out actions in hand for max win prob (and passing)
        # Treats game as if ending and only plans one step ahead
        # Compares to what qubit looks like after rotation!
        best_prob = 0
        best_action = None
        for i in range(len(hand)+1):
            if i == len(hand):
                action = None
            else:
                action = hand[i]
            self.test_state = np.copy(self.state)
            self.test_direction = self.direction
            temp_prob = self.try_action(action, team)
            if temp_prob > best_prob:
                best_prob = temp_prob
                best_action = action
        if best_action is not None:
            self.hand_size -= 1
        return best_action
    #######################################################