In [1]:
from appgym.web_env import WebEnv

In [2]:
env = WebEnv("http://127.0.0.1:8080/", content_selector='.mobile-content')

In [3]:
state, actions = env.reset()

In [4]:
state.image.shape

(480, 300, 3)

In [5]:
len(actions)

360

In [16]:
_ = env.step(actions[320])

In [5]:
480 / 20

24.0

In [6]:
300 / 20

15.0

In [7]:
24*15

360

In [8]:
480 / 8

60.0

In [12]:
env._viewport()

Rect(x=250, y=60, width=300, height=480)

In [10]:
range(10)

range(0, 10)

In [12]:
list(range(1, 11))

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [17]:
from appgym.agent import Agent

In [28]:
agent = Agent(actions)

In [33]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from torch.autograd import Variable
from itertools import count

In [None]:
s = Variable(torch.from_numpy(state.image).float())

In [43]:
s.shape

torch.Size([480, 300, 3])

In [45]:
s.view(3, 480, 300).unsqueeze(0).shape

torch.Size([1, 3, 480, 300])

In [49]:
img_state = s.view(3, 480, 300).unsqueeze(0)

In [50]:
img_state.shape

torch.Size([1, 3, 480, 300])

In [6]:
BATCH_SIZE = 128
GAMMA = 0.999
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 200

In [40]:
from collections import namedtuple

In [41]:
Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward'))

class ReplayMemory:

    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def push(self, *args):
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

In [8]:
class Model(nn.Module):

    def __init__(self, n_actions, drop_probability=0.2):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=2)
        self.bn1 = nn.BatchNorm2d(16)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
        self.bn2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 32, kernel_size=5, stride=2)
        self.bn3 = nn.BatchNorm2d(32)
        self.fc = nn.Linear(62016, 62016)
        self.drop = nn.Dropout(drop_probability)
        self.out = nn.Linear(62016, n_actions)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.fc(x.view(x.size(0), -1)))
        x = self.drop(x)
        x = self.out(x)
        return x
#         return F.softmax(x, dim = 1)
#         return 


# res = m(img_state)

In [43]:
memory = ReplayMemory(10_000)

In [9]:
import random
import math

model = Model(len(actions))

In [45]:
optimizer = optim.RMSprop(model.parameters())

In [23]:
_ = model.train()

In [29]:
def select_action(state, model, actions):
    s = Variable(torch.from_numpy(state.image).float())
    img_state = s.view(3, 480, 300).unsqueeze(0)
    action_probs = model(img_state).data[0]
    action_data = action_probs.max(0)
    action_idx = action_data[1][0]
    return actions[action_idx]

In [None]:
def optimize_model():
    if len(memory) < BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)
    batch = Transition(*zip(*transitions))

    non_final_mask = ByteTensor(tuple(map(lambda s: s is not None,
                                          batch.next_state)))
    non_final_next_states = Variable(torch.cat([s for s in batch.next_state
                                                if s is not None]),
                                     volatile=True)
    state_batch = Variable(torch.cat(batch.state))
    action_batch = Variable(torch.cat(batch.action))
    reward_batch = Variable(torch.cat(batch.reward))

    state_action_values = model(state_batch).gather(1, action_batch)

    next_state_values = Variable(torch.zeros(BATCH_SIZE)).type(FloatTensor)
    next_state_values[non_final_mask] = model(non_final_next_states).max(1)[0]

    # next_state_values.volatile = False

    expected_state_action_values = (next_state_values * GAMMA) + reward_batch

    loss = F.smooth_l1_loss(state_action_values, expected_state_action_values)

    optimizer.zero_grad()
    loss.backward()
    for param in model.parameters():
        param.grad.data.clamp_(-1, 1)
    optimizer.step()

In [None]:
num_episodes = 10
for i_episode in range(num_episodes):
    state, actions = env.reset()
    for s in count():
        action = select_action(state, model, actions)
        next_state, _, reward = env.step(action)
        
        reward = torch.FloatTensor([reward])
        
        memory.push(
            state.image, 
            action, 
            next_state.image,
            reward
        )
        
        state = next_state
        optimize_model()

In [48]:
s = torch.from_numpy(state.image).float()
img_state = s.view(3, 480, 300).unsqueeze(0)

In [52]:
concated = Variable(torch.cat(
    [img_state,
    img_state]
))

In [54]:
res = model(concated)

In [55]:
res.shape

torch.Size([2, 360])

In [53]:
concated.shape

torch.Size([2, 3, 480, 300])

In [31]:
res

Action(x=290, y=330, type='click')

In [21]:
res.data[0].max(0)[1][0]

254

In [28]:
res.data[0].max(0)[1][0]

254

In [24]:
s = Variable(torch.from_numpy(state.image).float())
img_state = s.view(3, 480, 300).unsqueeze(0)
res = model(img_state)

In [25]:
res.data[0].max(0)

(
  0.3224
 [torch.FloatTensor of size 1], 
  166
 [torch.LongTensor of size 1])

In [121]:
import random
import math

model = Model(len(actions))
steps_done = 0
def select_action(state, actions):
    global steps_done
    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) * math.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    if sample > eps_threshold:
        print("Model")
        s = Variable(torch.from_numpy(state.image).float())
        img_state = s.view(3, 480, 300).unsqueeze(0)
        res = model(img_state)
        return res.multinomial().data
    else:
        print("Random")
        return torch.LongTensor([[random.randrange(len(actions))]])

In [123]:
# s = state

for _ in range(10):
    act = select_action(state, actions)
    act_idx = act[0][0]
    state, actions, reward = env.step(actions[act_idx])


Random
Random
Random
Random
Random
Model
Random
Random
Random
Random


In [115]:
act = select_action(state, actions)

Model


In [112]:
act_idx = act[0][0]

10

In [85]:
res.multinomial().data[0]


 246
[torch.LongTensor of size 1]

In [67]:
res.shape

torch.Size([1, 360])

In [59]:
res.view(res.size(0), -1).shape

torch.Size([1, 62016])