# Monkey See, Monkey Do: Copy Opponent Strategy

Obviously not a great strategy by itself but how often does it actually win due to random chance?

## Agent Setup

In [None]:
%%writefile agent_copy_strat.py

import random
random.seed(27)

class Agent:
    
    def __init__(self):
        self.my_moves = []
        self.opponent_moves = []
        self.rewards = []
    
    def random_move(self, obs, config):
        '''Make a random move
        '''
        move = random.choice(range(config.banditCount))
        return move
        
    def get_last_move(self, moves, obs, config):
        '''Get the last move of given list of moves (of an agent).
        '''
        try:
            move = moves[-1]
        except IndexError:
            move = self.random_move(obs,config)
        return move
        
    def copy_move(self, obs, config):
        '''Simply copy last move of opponent.
        '''
        return self.get_last_move(self.opponent_moves, obs, config)
    
    # TODO: Record other information besides moves
    def record_history(self, obs, conf):
        '''Record history like list of moves (both agents), rewards, etc.
        '''
        # If first turn, don't record anything
        if obs.step > 0:
            # Record opponenet last move
            # TODO: consider if more than two agents
            my_index = (obs.agentIndex) % 2
            opponent_index = (obs.agentIndex + 1) % 2
            my_last_move = obs.lastActions[my_index]        
            opponent_last_move = obs.lastActions[opponent_index]
            self.my_moves.append(my_last_move)
            self.opponent_moves.append(opponent_last_move)
            # TODO: Record rewards
        return
    
    def use_strategy(self, obs, config, strat='random'):
        '''Return which bandit to choose given a strategy, observations, & 
        environment configuration.
        '''
        if strat.lower() == 'random':
            move = self.random_move(obs, config)
        elif strat.lower() == 'copy_move':
            move = self.copy_move(obs, config)
        # Default to choosing first lever if not known strategy
        else:
            move = 0
        return move
    

my_agent = Agent()

def agent_run(observation, config):
    # Record history
    my_agent.record_history(observation, config)
    # Simply copy moves
    bandit = my_agent.use_strategy(observation, config, strat='copy_move')
    return bandit 

# Agent Runs

Let's create a random agent to run against

In [None]:
from agent_copy_strat import Agent 

rand_agent = Agent()
def rand_run(obs,conf):
    bandit = rand_agent.use_strategy(obs,conf,strat='random')
    return bandit

In [None]:
!pip install kaggle-environments --upgrade -q

In [None]:
from kaggle_environments import make

In [None]:
env = make("mab", debug=True)

env.run(['agent_copy_strat.py', rand_run])
env.render(mode="ipython", width=800, height=300)

## Evaluation

### Checkout the rewards over time

In [None]:
# Import module we'll need to import our custom module
from shutil import copyfile

# Copy our file into the working directory (make sure it has .py suffix)
copyfile(src="../input/visualizing-reward-outcomes/SimulationExplorer.py", 
         dst= "../working/SimulationExplorer.py")

# Import SimulationExplorer functions
import SimulationExplorer as Explorer

In [None]:
sims = {'test':env}
test = Explorer.SimViz(sims)
test.plot_total_reward()

for n,env in sims.items():
    print(n,env.toJSON().get('rewards'))

### Multiple runs

Alright, let's see how this does a few many runs. Should be able to sneak out a few lucky wins

In [None]:
def print_trial_results(trial, env, start_time):
    '''Helper function to see how agents compare
    '''
    rewards = env.toJSON().get('rewards')
    print(f'Trial # {trial}:')
    print(f'\t{time.time()-start_time:.4} seconds')
    print(f'\t{"W" if rewards[0]>rewards[1] else "L"} → {rewards}')
    diff = rewards[0]-rewards[-1]
    print(f'\tDifference: {diff}')
    
    return diff

In [None]:
import time


sims = {}
diffs = []

for trial in range(20):
    start_time = time.time()
    myagent = Agent()
    
    env = make("mab", debug=True)
    env.run(['agent_copy_strat.py', rand_run])

    #
    name = f'Trial#{trial}'
    sims[name] = env
    
    #
    diffs.append(print_trial_results(trial, env, start_time))


test = Explorer.SimViz(sims)
test.plot_total_reward()

print(f'Win Percentage: {sum(1 for x in diffs if x>0)/len(diffs):.2}')