In [2]:
import torch
from scipy import optimize
import torch.nn.functional as F
import math
import numpy as np
from functools import reduce
from collections import OrderedDict

class PyTorchObjective(object):
    """PyTorch objective function, wrapped to be called by scipy.optimize."""
    def __init__(self, agent):
        self.f = agent.nn # some pytorch module, that produces a scalar loss
        # make an x0 from the parameters in this module
        parameters = OrderedDict(agent.nn.named_parameters())
        self.param_shapes = {n:parameters[n].size() for n in parameters}
        # ravel and concatenate all parameters to make x0
        self.x0 = np.concatenate([parameters[n].data.numpy().ravel() 
                                   for n in parameters])
        
        self.eval_fn = agent.evaluate

    def unpack_parameters(self, x):
        """optimize.minimize will supply 1D array, chop it up for each parameter."""
        i = 0
        named_parameters = OrderedDict()
        for n in self.param_shapes:
            param_len = reduce(lambda x,y: x*y, self.param_shapes[n])
            # slice out a section of this length
            param = x[i:i+param_len]
            # reshape according to this size, and cast to torch
            param = param.reshape(*self.param_shapes[n])
            named_parameters[n] = torch.from_numpy(param)
            # update index
            i += param_len
        return named_parameters

    def pack_grads(self):
        """pack all the gradients from the parameters in the module into a
        numpy array."""
        grads = []
        for p in self.f.parameters():
            grad = p.grad.data.numpy()
            grads.append(grad.ravel())
        return np.concatenate(grads)

    def is_new(self, x):
        # if this is the first thing we've seen
        if not hasattr(self, 'cached_x'):
            return True
        else:
            # compare x to cached_x to determine if we've been given a new input
            x, self.cached_x = np.array(x), np.array(self.cached_x)
            error = np.abs(x - self.cached_x)
            return error.max() > 1e-8

    def cache(self, x):
        # unpack x and load into module 
        state_dict = self.unpack_parameters(x)
        self.f.load_state_dict(state_dict)
        # store the raw array as well
        self.cached_x = x
        # zero the gradient
        self.f.zero_grad()
        # use it to calculate the objective
        obj = self.eval_fn()
        # backprop the objective
        # obj.backward()
        self.cached_f = obj
        return obj

    def fun(self, x):
        if self.is_new(x):
            self.cache(x)
        return self.cached_f

    def jac(self, x):
        if self.is_new(x):
            self.cache(x)
        return self.cached_jac

In [3]:

# import torch
# import torch.nn as nn
# import torch.optim as optim
# import torch.nn.functional as F

# import numpy as np
# from scipy import optimize

# from tqdm import tqdm

# if __name__ == '__main__':
#     # whatever this initialises to is our "true" W
#     linear = nn.Linear(32,32)
#     linear = linear.eval()

#     # input X
#     N = 10000
#     X = torch.Tensor(N,32)
#     X.uniform_(0.,1.) # fill with uniform
#     eps = torch.Tensor(N,32)
#     eps.normal_(0., 1e-4)

#     # output Y
#     with torch.no_grad():
#         Y = linear(X) #+ eps

#     # make module executing the experiment
#     class Objective(nn.Module):
#         def __init__(self):
#             super(Objective, self).__init__()
#             self.linear = nn.Linear(32,32)
#             self.linear = self.linear.train()
#             self.X, self.Y = X, Y

#         def forward(self):
#             output = self.linear(self.X)
#             return F.mse_loss(output, self.Y).mean()

#     objective = Objective()
    
#     maxiter = 100
#     with tqdm(total=maxiter) as pbar:
#         def verbose(xk):
#             pbar.update(1)
#         # try to optimize that function with scipy
#         obj = PyTorchObjective(objective)
#         xL = optimize.minimize(obj.fun, obj.x0, method='BFGS', jac=obj.jac,
#                 callback=verbose, options={'gtol': 1e-6, 'disp': True,
#                     'maxiter':maxiter})
#         #xL = optimize.minimize(obj.fun, obj.x0, method='CG', jac=obj.jac)# , options={'gtol': 1e-2})

In [4]:
import gym_gvgai

In [5]:
from agent.NNagent import NNagent

In [6]:
from generator.env_gen_wrapper import GridGame

In [7]:
from scipy.optimize import Bounds

In [8]:
_x = NNagent(GridGame(game='zelda', 
                     play_length=50, 
                     path=gym_gvgai.dir + '/envs/games/zelda_v0/', 
                     lvl_name='zelda_lvl0.txt', 
                     mechanics=['1', '2', '3', '+', 'g', 'w'], # monsters, key, door, wall
                  )
         )

Connecting to host 127.0.0.1 at port 39313 ...
Client connected to server [OK]


In [9]:
_x

<agent.NNagent.NNagent at 0x7b42701d3908>

In [10]:
_x.nn

Net(
  (conv1): Conv2d(13, 8, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 32, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=96, out_features=48, bias=True)
  (fc2): Linear(in_features=48, out_features=24, bias=True)
  (fc3): Linear(in_features=24, out_features=6, bias=True)
)

In [11]:
z = PyTorchObjective(_x)

In [12]:
z.x0.shape

(9262,)

In [13]:
tile = _x.reset()

In [14]:
o = _x.get_action(tile)

In [15]:
o

5

In [16]:
state_dict = z.unpack_parameters(np.random.randn(*z.x0.shape))

In [17]:
z.f.load_state_dict(state_dict)

<All keys matched successfully>

In [18]:
# z.cache(np.random.randn(*z.x0.shape))

In [19]:
# z.cached_f

In [20]:
bounds = [(-1, 1)]*z.x0.shape[0]

In [21]:
ans = optimize.differential_evolution(z.fun, bounds, maxiter=1, popsize=2)

MemoryError: 