In [1]:
import numpy as np
import random

In [2]:
class Env:
    
    def __init__(self, size):
        # init field
        self.field = np.zeros((size,size))
        i = random.randint(0, self.field.shape[0]-1)
        j = random.randint(0, self.field.shape[0]-1)
        # put agent to field
        self.field[i][j] = 1
        # define action map
        self.action_map = {
            'U':(-1,0),
            'D':(+1,0),
            'L':(0,-1),
            'R':(0,+1)
        }
        
    def move(self, cmd):
        i, j = np.where(self.field==1)
        i, j = i[0], j[0]
        ip, jp = self.action_map[cmd]
        ir, jr = i+ip, j+jp
        if ir>-1 and ir<self.field.shape[0] and jr>-1 and jr<self.field.shape[0]:
            self.field[i+ip][j+jp] = 1
            self.field[i][j] = 0
        
    def plot(self):
        print(self.field)
        
    def actions(self, cmd):
        action = np.zeros(4)
        action_list = list(self.action_map.keys())
        action_id = action_list.index(cmd)
        action[action_id] = 1
        return action
    
    def action_column(self, cmd):
        field_size = self.field.shape[0]
        empty_count = field_size-4 # 4 is constant count of 2-d actions
        column = np.append(self.actions(cmd), np.zeros(empty_count))
        return np.array(column)

In [5]:
def generate_data(size):
    X = []
    Y = []
    env = Env(5)
    field_size = env.field.shape[0]
    field_q_size = field_size**2
    
    for i in range(size):
        # create new env
        env = Env(5)
        # get random action
        action = random.choice(list(env.action_map.keys()))
        # represent action as binary column
        column = env.action_column(action)
        # join action_column to field at rigth to make NxN+1 sized numpy array
        field_augmented = np.concatenate((env.field, column.reshape(1,field_size).T), axis=1)
        # add input data example
        X.append(field_augmented.reshape(field_size*(field_size+1)))
        # make native moving
        env.move(action)
        # add solving
        Y.append(env.field.reshape(field_q_size))
        
    return np.array(X), np.array(Y)

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import numpy as np

In [7]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

Using cuda device


In [8]:
# define neural network
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(5*(5+1), 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 5*5)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [9]:
# train
def train(model, x, y, epochs, batch_size):
    model.train()
    for epoch in range(epochs):
        for i in range(0, x.shape[0], batch_size):
            x_batch = x[i:i+batch_size]
            y_batch = y[i:i+batch_size]
            x_batch = Variable(torch.from_numpy(x_batch).float())
            y_batch = Variable(torch.from_numpy(y_batch).float())
            
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            
            optimizer.zero_grad()            
            output = model(x_batch)
            #print(output.shape, y_batch.shape)
            loss = F.mse_loss(output, y_batch)
            loss.backward()
            optimizer.step()
        print('Epoch: {}\tLoss: {:.6f}'.format(epoch, loss.data))

In [10]:
# generate data
x_train, y_train = generate_data(1000000)
x_test, y_test = generate_data(100000)

# define model
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

In [11]:
x_train.shape, y_train.shape

((1000000, 30), (1000000, 25))

In [12]:
# train
train(model, x_train, y_train, 1000, 10000)

Epoch: 0	Loss: 0.045813
Epoch: 1	Loss: 0.042949
Epoch: 2	Loss: 0.041232
Epoch: 3	Loss: 0.040176
Epoch: 4	Loss: 0.039516
Epoch: 5	Loss: 0.039098
Epoch: 6	Loss: 0.038832
Epoch: 7	Loss: 0.038660
Epoch: 8	Loss: 0.038547
Epoch: 9	Loss: 0.038470
Epoch: 10	Loss: 0.038417
Epoch: 11	Loss: 0.038380
Epoch: 12	Loss: 0.038354
Epoch: 13	Loss: 0.038334
Epoch: 14	Loss: 0.038318
Epoch: 15	Loss: 0.038305
Epoch: 16	Loss: 0.038293
Epoch: 17	Loss: 0.038283
Epoch: 18	Loss: 0.038273
Epoch: 19	Loss: 0.038265
Epoch: 20	Loss: 0.038257
Epoch: 21	Loss: 0.038249
Epoch: 22	Loss: 0.038243
Epoch: 23	Loss: 0.038236
Epoch: 24	Loss: 0.038229
Epoch: 25	Loss: 0.038221
Epoch: 26	Loss: 0.038214
Epoch: 27	Loss: 0.038207
Epoch: 28	Loss: 0.038200
Epoch: 29	Loss: 0.038193
Epoch: 30	Loss: 0.038187
Epoch: 31	Loss: 0.038180
Epoch: 32	Loss: 0.038174
Epoch: 33	Loss: 0.038167
Epoch: 34	Loss: 0.038160
Epoch: 35	Loss: 0.038153
Epoch: 36	Loss: 0.038145
Epoch: 37	Loss: 0.038138
Epoch: 38	Loss: 0.038131
Epoch: 39	Loss: 0.038124
Epoch: 40	

Epoch: 320	Loss: 0.035647
Epoch: 321	Loss: 0.035633
Epoch: 322	Loss: 0.035618
Epoch: 323	Loss: 0.035604
Epoch: 324	Loss: 0.035590
Epoch: 325	Loss: 0.035576
Epoch: 326	Loss: 0.035562
Epoch: 327	Loss: 0.035547
Epoch: 328	Loss: 0.035533
Epoch: 329	Loss: 0.035518
Epoch: 330	Loss: 0.035504
Epoch: 331	Loss: 0.035489
Epoch: 332	Loss: 0.035474
Epoch: 333	Loss: 0.035460
Epoch: 334	Loss: 0.035445
Epoch: 335	Loss: 0.035430
Epoch: 336	Loss: 0.035415
Epoch: 337	Loss: 0.035400
Epoch: 338	Loss: 0.035385
Epoch: 339	Loss: 0.035370
Epoch: 340	Loss: 0.035355
Epoch: 341	Loss: 0.035340
Epoch: 342	Loss: 0.035325
Epoch: 343	Loss: 0.035310
Epoch: 344	Loss: 0.035295
Epoch: 345	Loss: 0.035280
Epoch: 346	Loss: 0.035265
Epoch: 347	Loss: 0.035250
Epoch: 348	Loss: 0.035235
Epoch: 349	Loss: 0.035219
Epoch: 350	Loss: 0.035204
Epoch: 351	Loss: 0.035189
Epoch: 352	Loss: 0.035173
Epoch: 353	Loss: 0.035158
Epoch: 354	Loss: 0.035142
Epoch: 355	Loss: 0.035127
Epoch: 356	Loss: 0.035111
Epoch: 357	Loss: 0.035096
Epoch: 358	L

Epoch: 636	Loss: 0.030589
Epoch: 637	Loss: 0.030575
Epoch: 638	Loss: 0.030560
Epoch: 639	Loss: 0.030546
Epoch: 640	Loss: 0.030531
Epoch: 641	Loss: 0.030517
Epoch: 642	Loss: 0.030502
Epoch: 643	Loss: 0.030488
Epoch: 644	Loss: 0.030473
Epoch: 645	Loss: 0.030458
Epoch: 646	Loss: 0.030444
Epoch: 647	Loss: 0.030429
Epoch: 648	Loss: 0.030415
Epoch: 649	Loss: 0.030400
Epoch: 650	Loss: 0.030386
Epoch: 651	Loss: 0.030371
Epoch: 652	Loss: 0.030357
Epoch: 653	Loss: 0.030342
Epoch: 654	Loss: 0.030327
Epoch: 655	Loss: 0.030313
Epoch: 656	Loss: 0.030298
Epoch: 657	Loss: 0.030284
Epoch: 658	Loss: 0.030269
Epoch: 659	Loss: 0.030255
Epoch: 660	Loss: 0.030240
Epoch: 661	Loss: 0.030226
Epoch: 662	Loss: 0.030212
Epoch: 663	Loss: 0.030197
Epoch: 664	Loss: 0.030183
Epoch: 665	Loss: 0.030168
Epoch: 666	Loss: 0.030154
Epoch: 667	Loss: 0.030139
Epoch: 668	Loss: 0.030125
Epoch: 669	Loss: 0.030110
Epoch: 670	Loss: 0.030096
Epoch: 671	Loss: 0.030081
Epoch: 672	Loss: 0.030067
Epoch: 673	Loss: 0.030053
Epoch: 674	L

Epoch: 952	Loss: 0.026058
Epoch: 953	Loss: 0.026044
Epoch: 954	Loss: 0.026029
Epoch: 955	Loss: 0.026015
Epoch: 956	Loss: 0.026000
Epoch: 957	Loss: 0.025986
Epoch: 958	Loss: 0.025971
Epoch: 959	Loss: 0.025957
Epoch: 960	Loss: 0.025942
Epoch: 961	Loss: 0.025928
Epoch: 962	Loss: 0.025913
Epoch: 963	Loss: 0.025899
Epoch: 964	Loss: 0.025884
Epoch: 965	Loss: 0.025870
Epoch: 966	Loss: 0.025855
Epoch: 967	Loss: 0.025840
Epoch: 968	Loss: 0.025826
Epoch: 969	Loss: 0.025811
Epoch: 970	Loss: 0.025797
Epoch: 971	Loss: 0.025782
Epoch: 972	Loss: 0.025768
Epoch: 973	Loss: 0.025753
Epoch: 974	Loss: 0.025738
Epoch: 975	Loss: 0.025724
Epoch: 976	Loss: 0.025709
Epoch: 977	Loss: 0.025694
Epoch: 978	Loss: 0.025680
Epoch: 979	Loss: 0.025665
Epoch: 980	Loss: 0.025651
Epoch: 981	Loss: 0.025636
Epoch: 982	Loss: 0.025621
Epoch: 983	Loss: 0.025606
Epoch: 984	Loss: 0.025592
Epoch: 985	Loss: 0.025577
Epoch: 986	Loss: 0.025562
Epoch: 987	Loss: 0.025547
Epoch: 988	Loss: 0.025532
Epoch: 989	Loss: 0.025518
Epoch: 990	L

In [13]:
def move_by_model(env, model, cmd):
    field_size = env.field.shape[0]
    
    column = env.action_column(cmd)
    # join action_column to field at rigth to make NxN+1 sized numpy array
    field_augmented = np.concatenate((env.field, column.reshape(1,field_size).T), axis=1)
    # add input data example
    x_predict = np.array([field_augmented.reshape(field_size*(field_size+1))])
    # reshape NxN+1 to N*(N+1)
    # x_predict = np.array([env.field.reshape(field_size**2)])
    
    # convert to torch format
    x_predict = Variable(torch.from_numpy(x_predict).float())
    
    # send to device
    x_predict = x_predict.to(device)
    
    # predict and copy to cpu
    y_predict = model(x_predict).cpu().data.numpy()    
    
    # reshape to field format
    y_predict.reshape(field_size,field_size)
    # set max probability to 1
    y_predict[np.arange(y_predict.shape[0]), y_predict.argmax(1)] = 1
    # set others probabilities to 0
    y_predict[y_predict < 1] = 0
    # reshape to 5*5 and update field
    env.field = y_predict.reshape(field_size, field_size)

In [14]:
field_size = 5
env = Env(field_size)
env.plot()
move_by_model(env, model, 'U')
env.plot()
move_by_model(env, model, 'U')
env.plot()
move_by_model(env, model, 'U')
env.plot()

[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
[[0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
[[0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


In [15]:
move_by_model(env, model, 'R')
env.plot()

[[0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


In [16]:
move_by_model(env, model, 'D')
env.plot()

[[0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


In [17]:
move_by_model(env, model, 'L')
env.plot()

[[0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


In [18]:
move_by_model(env, model, 'L')
env.plot()

[[0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


In [19]:
move_by_model(env, model, 'L')
env.plot()

[[0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
