In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import json
from sklearn.model_selection import train_test_split

In [44]:
df = pd.read_csv('./train_csv/test.csv', header=None, delimiter=';')
df.columns = ['wave','truth']
df = df[df['truth'].isin(['SEDUTO','VAI'])]

train, test = train_test_split(df, train_size=50, stratify=df["truth"])

In [45]:
import random

class Memory:
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []

    def push(self, command, action, reward):
        if len(self.memory) >= self.capacity:
            self.memory.pop(0)
        self.memory.append((command, action, reward))

    def sample(self, batch_size):
        if len(self.memory) < 1:
            return []
        # return random.sample(self.memory, min(batch_size, len(self.memory)//2+1))
        return random.sample(self.memory, min(batch_size, len(self.memory)))


In [46]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = nn.functional.softmax(out, dim=-1).squeeze()
        return out

In [50]:
import random
from model.model import Net
import torch.nn as nn
from torch.nn import functional as F
import torch
import numpy as np
import torch.optim as optim
import json

_LIVE_LEARNING_RATE = 2e-2
_MEMORY_LEARNING_RATE = 3e-3
_GOOD_REWARD = 5
_BAD_REWARD = 5
_MEMORY_SIZE = 20
num_possible_commands = 20

moves = {
    "JUMP",
    "CROUCH",
    "SIT",
    "WALK"
}


class Dog:
    def __init__(self, moves, num_possible_commands):
        self.device = torch.device('cpu')
        self.moves = moves
        self.commands = set()
        self.max_commands = num_possible_commands
        self.itom = {i: m for i, m in enumerate(moves)}
        self.mtoi = {m: i for i, m in self.itom.items()}
        self.model = Net(num_possible_commands, 100, len(moves)).to(self.device)
        self.memory = Memory(_MEMORY_SIZE)
        self.optimizer = optim.Adam(self.model.parameters(), lr=_LIVE_LEARNING_RATE)
        
        print(self.itom)

    def update_vocabulary(self, word):
        if word in self.commands:
            return 0
        self.mtoi[word] = len(self.moves)+len(self.commands)   
        self.itom[len(self.moves)+len(self.commands)] = word
        self.commands.add(word)

        return 1

    def _get_input(self, command):
        command.extend([0] * (self.max_commands - len(command)))
        tensor_command = torch.tensor(command[:self.max_commands])
        return tensor_command

    def predict(self, command):
        with torch.no_grad():
            output = self.model(self._get_input(command))
            # print(output)
            next_move = torch.multinomial(output, 1)
            return self.itom[next_move.item()]

    def learn(self, command, action, reward):
        reward = int(reward)
        if reward > 0:
            reward = reward*_GOOD_REWARD
        else:
            reward = reward*_BAD_REWARD 
        # optimizer = optim.Adam(self.model.parameters(), lr=_LIVE_LEARNING_RATE)
        action_t = torch.tensor(self.mtoi[action], dtype=torch.long)
        reward_t = torch.tensor(reward, dtype=torch.float)

        self.memory.push(self._get_input(command), action_t, reward_t)
        output_prob = self.model(self._get_input(command))
        loss = -torch.log(output_prob[action_t])*reward_t
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        
    def learn_with_mem(self, command, action, reward):        
        reward = int(reward)
        if reward > 0:
            reward = reward*_GOOD_REWARD
        else:
            reward = reward*_BAD_REWARD 
        optimizer = optim.Adam(self.model.parameters(), lr=_LIVE_LEARNING_RATE)
        action_t = torch.tensor(self.mtoi[action], dtype=torch.long)
        reward_t = torch.tensor(reward, dtype=torch.float)

        self.memory.push(self._get_input(command), action_t, reward_t)
        output_prob = self.model(self._get_input(command))
        loss = -torch.log(output_prob[action_t])*reward_t
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        self.replay_memory()

        

    def replay_memory(self):
        optimizer = optim.Adam(self.model.parameters(), lr=_MEMORY_LEARNING_RATE)
        for command_t, action_t, reward_t in self.memory.sample(_MEMORY_SIZE):
            output_prob = self.model(command_t)
            loss = -torch.log(output_prob[action_t])*reward_t
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


In [55]:
# simul
trans = {
    "SALTA": "JUMP",
    "SEDUTO": "SIT",
    "GIU": "CROUCH",
    "VAI": "WALK"
}


dog = Dog(moves, num_possible_commands)
for w, target in train.sample(frac=1.0).values:
    command = json.loads(w)
    
    action = dog.predict(command)
    reward = -1
    if action == trans[target]:
        reward = 1
    print(f"""T: {target} A: {action} -> {reward}""")
    # print(f"{action} vs {target}: {reward}")
    dog.learn(command, action, reward)

{0: 'JUMP', 1: 'SIT', 2: 'CROUCH', 3: 'WALK'}
tensor([0.2265, 0.2464, 0.2827, 0.2445])
T: SEDUTO A: JUMP -> -1
tensor([0.1780, 0.2626, 0.2986, 0.2608])
T: SEDUTO A: WALK -> -1
tensor([0.1570, 0.2874, 0.3288, 0.2267])
T: VAI A: SIT -> -1
tensor([0.1491, 0.2686, 0.3698, 0.2124])
T: SEDUTO A: SIT -> 1
tensor([0.1415, 0.2746, 0.3842, 0.1997])
T: VAI A: CROUCH -> -1
tensor([0.1421, 0.2877, 0.3723, 0.1980])
T: VAI A: CROUCH -> -1
tensor([0.1467, 0.3051, 0.3466, 0.2016])
T: VAI A: SIT -> -1
tensor([0.1524, 0.3063, 0.3344, 0.2070])
T: SEDUTO A: WALK -> -1
tensor([0.1587, 0.3092, 0.3298, 0.2022])
T: VAI A: SIT -> -1
tensor([0.1635, 0.3055, 0.3311, 0.1999])
T: SEDUTO A: SIT -> 1
tensor([0.1709, 0.3053, 0.3253, 0.1986])
T: VAI A: SIT -> -1
tensor([0.1775, 0.2994, 0.3251, 0.1980])
T: VAI A: CROUCH -> -1
tensor([0.1850, 0.2970, 0.3179, 0.2001])
T: VAI A: WALK -> 1
tensor([0.1902, 0.2933, 0.3092, 0.2073])
T: SEDUTO A: WALK -> -1
tensor([0.1910, 0.2966, 0.3079, 0.2045])
T: SEDUTO A: SIT -> 1
tensor([

In [56]:
# test

print(f"PREDICTED -> REAL")
rounds = 1
correct = 0
for k in range(rounds):
    for w, target in test.values:
        command = json.loads(w)
        # print(target)
        # print(command)
        action = dog.predict(command)
        print(f"{action} -> {trans[target]}")
        if action == trans[target]:
            correct += 1
print(f"correct {correct*100/(len(test)*rounds)}% ({correct}/{len(test)*rounds})")

PREDICTED -> REAL
tensor([0.0133, 0.9232, 0.0331, 0.0305])
SIT -> WALK
tensor([0.0131, 0.9240, 0.0328, 0.0301])
SIT -> WALK
tensor([0.0134, 0.9226, 0.0333, 0.0307])
SIT -> WALK
tensor([0.0130, 0.9248, 0.0326, 0.0297])
SIT -> WALK
tensor([0.0127, 0.9262, 0.0320, 0.0291])
SIT -> SIT
tensor([0.0125, 0.9271, 0.0316, 0.0289])
JUMP -> SIT
tensor([0.0125, 0.9268, 0.0317, 0.0289])
SIT -> SIT
tensor([0.0131, 0.9239, 0.0328, 0.0301])
SIT -> WALK
tensor([0.0127, 0.9260, 0.0321, 0.0292])
SIT -> SIT
tensor([0.0133, 0.9234, 0.0329, 0.0305])
SIT -> SIT
correct 40.0% (4/10)


In [69]:
test.groupby('truth').count()

Unnamed: 0_level_0,wave
truth,Unnamed: 1_level_1
GIU,5
SALTA,5
