In [None]:
import gym_gvgai
import gym

In [None]:
import sys
import numpy as np
import pandas as pd
from collections import defaultdict

In [None]:
env = gym_gvgai.make('gvgai-zelda-lvl3-v0')

In [None]:
def e_greedy_policy_creation(Qstate, epsilon, nA):
    """
    Q: Our Q table. 
      Q[state] = numpy.array
      Q[state][action] = float.
    epsilon: small value that controls exploration.
    nA: the number of actions available in this environment
    
    return: an epsilon-greedy policy specific to the state.
    """
    
    policy = np.ones(nA) * epsilon/nA
    policy[np.argmax(Qstate)] = 1 - epsilon + (epsilon / nA)        
    return policy

def choose_action(policy, env):
    return np.random.choice(np.arange(env.nA), p=policy)

def sarsa(env, num_episodes, alpha, gamma=1.0, fixedEpsilon = False):
    # initialize action-value function (empty dictionary of arrays)
    Q = defaultdict(lambda: np.zeros(env.nA))
    # initialize performance monitor
    # loop over episodes

    for i_episode in range(1, num_episodes+1): 
        state = env.reset()
        #To see the policy from the book, keep epsilon fixed at 0.1
        epsilon = 0.1 if fixedEpsilon else 1.0 / i_episode
        # monitor progress
        if i_episode % 100 == 0:
            print("\rEpisode {}/{}".format(i_episode, num_episodes), end="")
            sys.stdout.flush() 
        done = False
        policy = e_greedy_policy_creation(Q[state], epsilon, env.nA)    
        action = choose_action(policy, env)
        while not done:
            sPrime, reward, done, info = env.step(action)
            policySPrime = e_greedy_policy_creation(Q[sPrime], epsilon, env.nA)
            aPrime = choose_action(policySPrime, env)
            Q[state][action] += alpha * (reward + (gamma * Q[sPrime][aPrime]) - Q[state][action])
            state, action = sPrime, aPrime
    return Q

In [None]:
x = env.reset()

In [None]:
import matplotlib.pyplot as plt

In [None]:
from levels.base import Generator

In [None]:
MECHANICS = ['+', 'g']

In [None]:
def initialize(path):
    f = open(path, 'r')
    f = f.readlines()
    rep = []
    for l in f: 
        rep.append(l[:-1])
    mat = []
    for r in rep:
        for s in r:
            mat.append(s)
    npa = np.array(mat).reshape((9, -1))
    return npa

In [None]:
gen = Generator(initialize('./levels/zelda_lvl3.txt'), MECHANICS)

In [None]:
pd.DataFrame(gen.world)

In [None]:
gen.mutate(0.2)

In [None]:
pd.DataFrame(gen.world)

In [None]:
with open("./levels/test.txt", 'w+') as fname:
    fname.write(str(gen))
    np.save("./levels/test.npy", gen.world)

In [None]:
plt.imshow(x) # original env

In [None]:
env.unwrapped._setLevel("./levels/test.txt")

In [None]:
plt.imshow(env.reset()) # mutated environment

In [None]:
p2 = Generator(initialize("./levels/test.txt"), MECHANICS)

In [None]:
gen.mutate(0.3)

In [None]:
child = gen.crossOver(p2)

In [None]:
with open("./levels/child.txt", 'w+') as fname:
    fname.write(str(child))
    np.save("./levels/test.npy", child.world)

In [None]:
env.unwrapped._setLevel("./levels/child.txt")

In [None]:
plt.imshow(env.reset()) # after crossover environment

In [None]:
import time