## Reinforcement Learning Project - Interactive Fiction

In [28]:
from jericho import *
from jericho.template_action_generator import TemplateActionGenerator
import random

### Read in an interactive fiction work into Jericho environment
Currently: Detective

In [2]:
env = FrotzEnv("z-machine-games-master/jericho-game-suite/detective.z5")

### RL problem setup:

States: Text at each step<br>
Actions: Sampled text (for Detective, either cardinal direction or [verb][noun] generally)

### Playthrough of game with agent making random actions

In [53]:
# Maximum possible score in game
print(f'Maximum possible score in game: {env.get_max_score()}')

Maximum possible score in game: 360


In [56]:
# Reset game to initial state S_0
env.reset()

while not env.game_over():
    # Here, I use the environment to check what are possible actions, and choose a random one.
    # I may or may not do this in the future (sample my own words for actions, or attempt that as a possible agent)
    available_actions = env.get_valid_actions()
    random_action = random.choice(available_actions)
    
    print(f'State: {env.get_state()[-1]}')
    print(f'Random action chosen: {random_action}')
    env.step(random_action)
    print()

State: b'\n\n\n\n[Type "help" for more information about this version]\n\nDetective\nBy Matt Barringer.\nPorted by Stuart Moore.\nStuart_Moore@my-deja.com\nRelease 1 / Serial number 000715 / Inform v6.21 Library 6/10 SD\n\n<< Chief\'s office >>\nYou are standing in the Chief\'s office. He is telling you "The Mayor was murdered yeaterday night at 12:03 am. I want you to solve it before we get any bad publicity or the FBI has to come in. "Yessir!" You reply. He hands you a sheet of paper. Once you have read it, go north or west.\n\nYou can see a piece of white paper here.\n\n[Your score has just gone up by ten points.]\n'
Random action chosen: west

State: b'\n\n<< Closet >>\nYou are in a closet. There is a gun on the floor. Better get it. To exit, go east.\n\nYou can see a small black pistol here.\n'
Random action chosen: push pistol to floor

State: b'\nDropped.\n'
Random action chosen: north

State: b"\nYou can't go north from here!\n\n<< Closet >>\nYou are in a closet. There is a gun

State: b'\nTaken.\n\n[Your score has just gone up by ten points.]\n'
Random action chosen: east

State: b"\n\n<< Mayor's house >>\nYou are in the house, at the scene of the crime. You enter and flash your badge before a cop. He admits you. To the north is the upstairs. To the east is the living room and to the west is the dining room.\n"
Random action chosen: north

State: b'\n\n<< Upstairs hallway >>\nYou are in the hallway of the large house of the mayor. It is an amazingly large house. You can go north, south, east or west.\n\n[Your score has just gone up by ten points.]\n'
Random action chosen: get up

State: b"\nYou can't go out from here!\n\n<< Upstairs hallway >>\nYou are in the hallway of the large house of the mayor. It is an amazingly large house. You can go north, south, east or west.\n"
Random action chosen: get up

State: b"\nYou can't go out from here!\n\n<< Upstairs hallway >>\nYou are in the hallway of the large house of the mayor. It is an amazingly large house. You ca

State: b'\n\n<< Video Store >>\nYou are in a video store called Brickbuster Video. There are about 3,000 videos here. You can go north, or east.\n'
Random action chosen: west

State: b"\nYou can't go west from here!\n\n<< Video Store >>\nYou are in a video store called Brickbuster Video. There are about 3,000 videos here. You can go north, or east.\n"
Random action chosen: north

State: b'\n\n<< Backroom >>\nYou are in the backroom of Brickbuster Video. You see a small video on the floor, but you dismiss it as having no potential value to the crime. You can go south.\n'
Random action chosen: south

State: b'\n\n<< Video Store >>\nYou are in a video store called Brickbuster Video. There are about 3,000 videos here. You can go north, or east.\n'
Random action chosen: west

State: b"\nYou can't go west from here!\n\n<< Video Store >>\nYou are in a video store called Brickbuster Video. There are about 3,000 videos here. You can go north, or east.\n"
Random action chosen: north

State: b'\n

In [57]:
# Output final score of randomly played game
print(f'Final score of random choice game: {env.get_score()}')

Final score of random choice game: 100


### An agent that learns from the environment

Attempting Q-learning, based on Lab 4 Tic-Tac-Toe Agent (incomplete)

In [None]:
class Agent():
    "All of the Agent except Q-learning has been given to you."
    def __init__(self, env, epsilon=0.1, alpha=1.0):
        self.V = dict() # Build up the values of different states as we encounter them; Note the Markov assumption
        self.game = env
        self.epsilon = epsilon
        self.alpha = alpha

    def state_value(self, game_state):
        "Look up state value. If never seen state, then assume neutral."
        return self.V.get(game_state, 0.0) 

    def learn_game(self, n_episodes=1_000):
        "Let's learn through complete experience to get that reward."
        for episode in range(n_episodes):
            self.learn_from_episode()

    def learn_from_episode(self):
        "Update Values based on reward."
        game = self.game.reset()
        while not game.game_over():
            self.learn_from_move(game)
        self.V[game.get_state()] = self.reward(game)
        
    def learn_select_move(self, game):
        "Exploration and exploitation"
        allowed_state_values = self.state_values( game.allowed_moves() )
        if game.player == self.value_player:
            best_move = self.argmax_V(allowed_state_values)
        else:
            best_move = self.argmin_V(allowed_state_values)

        selected_move = best_move
        if random.random() < self.epsilon:
            selected_move = self.random_V(allowed_state_values)

        return (best_move, selected_move)

    def play_select_move(self, game):
        "Make the move based on the best option for the player."
        allowed_state_values = self.state_values( game.allowed_moves() )
        if game.player == self.value_player:
            return self.argmax_V(allowed_state_values)
        else:
            return self.argmin_V(allowed_state_values)

    def round_V(self):
        "After training, this makes action selection random from equally-good choices"
        for k in self.V.keys():
            self.V[k] = round(self.V[k],1)

    def state_values(self, game_states):
        return dict((state, self.state_value(state)) for state in game_states)

    def argmax_V(self, state_values):
        "For the best possible states, chose randomly amongst them."
        max_V = max(state_values.values())
        chosen_state = random.choice([state for state, v in state_values.items() if v == max_V])
        return chosen_state

    def argmin_V(self, state_values):
        "For the worst possible states, chose randomly amongst them."
        min_V = min(state_values.values())
        chosen_state = random.choice([state for state, v in state_values.items() if v == min_V])
        return chosen_state

    def random_V(self, state_values):
        "Any state will do."
        return random.choice(list(state_values.keys()))

    def reward(self, game):
        if game.winner == self.value_player:
            return 1.0 # Winning is good
        elif game.winner:
            return -1.0 # Losing is bad
        else:
            return 0.0  # Tying is indifferent


In [None]:
# Parameters
alpha = 1
gamma = 1

In [None]:
# Reset game to initial state S_0
env.reset()

while not env.game_over():
    state = env.get_state()
    