In [34]:
import sys
sys.path.append('../gym_art')

from PIL import Image

from paintings import PaintingEnv, load_image
import numpy as np
import pandas as pd
import random

In [2]:
env = PaintingEnv()

In [42]:
class OptimalAgent(object):
    def __init__(self, env):
        # The memory of all the paintings and their values
        self.env = env
        self.paint_to_vals = { artist : None for artist in env.paintings }

    def act(self, trial_info):
        sides = ['left', 'right']
        vals  = np.array([ self.paint_to_vals[trial_info[s]['name']] for s in sides ])
        nulls = pd.isnull(vals)

        # If both are None
        if nulls.all():
            action = random.choice([0,1])
        # If one is None then choose that one
        elif nulls.any():
            action = np.argmax(nulls)
        # If neither is none, then choose the higher valued one
        else:
            action = np.argmax(vals)

        return action
    
    def step(self, trial_info, action, reward):
        sides = ['left', 'right']
        chosen_side = sides[action]
        painting = trial_info[chosen_side]['name']
        if self.paint_to_vals[painting] is None:
            self.paint_to_vals[painting] = reward
        elif self.paint_to_vals[painting] != reward:
            raise Exception("Reward value has changed")
        return

In [60]:
mean_scores = []
for i in range(10):
    state, reward, done, info = env.reset(full_reset=True) # reset environment
    agent = OptimalAgent(env) # Call on the agent
    score = 0
    for t in range(env.n_trials - 1):
        action = agent.act(info)                   # select an action
        next_state, reward, done, next_info = env.step(action)   # send action to environment
        agent.step(info, action, reward) # learning step
        state = next_state
        info = next_info
        score += reward
        if done:
            break
    mean_score = score/t # keep scores in range of 0-1, allow comparison to humans above
    mean_scores.append(mean_score)

In [62]:
# This is the average optimal score
np.array(mean_scores).mean()

0.5138636363636363