In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import json
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('./train_csv/test.csv', header=None, delimiter=';')
df.columns = ['wave','truth']
# df = df[df['truth'].isin(['SEDUTO','VAI'])]

train, test = train_test_split(df, train_size=50, stratify=df["truth"])

In [3]:
import random

class Memory:
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []

    def push(self, command, action, reward):
        if len(self.memory) >= self.capacity:
            self.memory.pop(0)
        self.memory.append((command, action, reward))

    def sample(self, batch_size):
        if len(self.memory) < 1:
            return []
        # return random.sample(self.memory, min(batch_size, len(self.memory)//2+1))
        return random.sample(self.memory, min(batch_size, len(self.memory)))


In [166]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = nn.functional.softmax(out, dim=-1).squeeze()
        return out

In [73]:
# RNN
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)
        out = self.fc(h_n[-1])
        out = self.softmax(out)
        return out

In [190]:
class CNNModel(nn.Module):
    def __init__(self, num_classes):
        super(CNNModel, self).__init__()
        
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool1d(kernel_size=2)
        
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool1d(kernel_size=2)
        
        self.fc = nn.Linear(32*23, num_classes)
        self.softmax = nn.Softmax(dim=-1)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = self.softmax(x)
        
        return x

In [234]:
import random
from model.model import Net
import torch.nn as nn
from torch.nn import functional as F
import torch
import numpy as np
import torch.optim as optim
import json

_LIVE_LEARNING_RATE = 0.001
_MEMORY_LEARNING_RATE = 3e-3
_GOOD_REWARD = 2
_BAD_REWARD = 1
_MEMORY_SIZE = 20
_MAX_NORM = 1.0
_REWARD_MULTIPLIER = 0.1
num_possible_commands = 100

moves = {
    "JUMP",
    "CROUCH",
    "SIT",
    "WALK"
}

trans = {
    "SALTA": "JUMP",
    "SEDUTO": "SIT",
    "GIU": "CROUCH",
    "VAI": "WALK"
}


class Dog:
    def __init__(self, moves, num_possible_commands):
        self.device = torch.device('cpu')
        self.moves = moves
        self.commands = set()
        self.max_commands = num_possible_commands
        self.itom = {i: m for i, m in enumerate(moves)}
        self.mtoi = {m: i for i, m in self.itom.items()}
        self.model = CNNModel(len(moves)).to(self.device)
        self.memory = Memory(_MEMORY_SIZE)
        self.optimizer = optim.Adam(self.model.parameters(), lr=_LIVE_LEARNING_RATE)
        self.loss_fn = nn.CrossEntropyLoss()
        
        print(self.itom)

    def update_vocabulary(self, word):
        if word in self.commands:
            return 0
        self.mtoi[word] = len(self.moves)+len(self.commands)   
        self.itom[len(self.moves)+len(self.commands)] = word
        self.commands.add(word)

        return 1

    def _get_input(self, command):
        command.extend([0] * (self.max_commands - len(command)))
        tensor_command = torch.tensor(command[:self.max_commands]).unsqueeze(0).unsqueeze(0)
        return tensor_command

    def predict(self, command):
        with torch.no_grad():
            output = self.model(self._get_input(command))
            next_move = torch.multinomial(output, 1)
            print(output)
            return self.itom[next_move.item()]

    def learn(self, command, action, reward):
        reward = int(reward)
        if reward > 0:
            reward = reward * _GOOD_REWARD
        else:
            reward = reward * _BAD_REWARD
            
        action_t = torch.tensor(self.mtoi[action], dtype=torch.long)
        reward_t = torch.tensor(reward, dtype=torch.float)
        
        # self.memory.push(self._get_input(command), action_t, reward_t)
        output_prob = self.model(self._get_input(command)).squeeze()
        # print(f"{output_prob} - {action_t} - {reward_t * _REWARD_MULTIPLIER}")

        loss = -torch.log(output_prob[action_t]) * reward_t * _REWARD_MULTIPLIER

        self.optimizer.zero_grad()
        loss.backward()

        self.optimizer.step()
        
    def learn_with_mem(self, command, action, reward):        
        reward = int(reward)
        if reward > 0:
            reward = reward*_GOOD_REWARD
        else:
            reward = reward*_BAD_REWARD 
        optimizer = optim.Adam(self.model.parameters(), lr=_LIVE_LEARNING_RATE)
        action_t = torch.tensor(self.mtoi[action], dtype=torch.long)
        reward_t = torch.tensor(reward, dtype=torch.float)

        self.memory.push(self._get_input(command), action_t, reward_t)
        output_prob = self.model(self._get_input(command))
        loss = -torch.log(output_prob[action_t])*reward_t
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        self.replay_memory()

        

    def replay_memory(self):
        optimizer = optim.Adam(self.model.parameters(), lr=_MEMORY_LEARNING_RATE)
        for command_t, action_t, reward_t in self.memory.sample(_MEMORY_SIZE):
            output_prob = self.model(command_t)
            loss = -torch.log(output_prob[action_t])*reward_t
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


In [None]:
# simul
trans = {
    "SALTA": "JUMP",
    "SEDUTO": "SIT",
    "GIU": "CROUCH",
    "VAI": "WALK"
}


dog = Dog(moves, num_possible_commands)
for w, target in train.sample(frac=1.0).values:
    command = json.loads(w)
    
    action = dog.predict(command)
    reward = -1
    if action == trans[target]:
        reward = 1
    print(f"""T: {target} A: {action} -> {reward}""")
    # print(f"{action} vs {target}: {reward}")
    dog.learn(command, action, reward)

In [None]:
# test

print(f"PREDICTED -> REAL")
rounds = 1
correct = 0
for k in range(rounds):
    for w, target in test.values:
        command = json.loads(w)
        # print(target)
        # print(command)
        action = dog.predict(command)
        print(f"{action} -> {trans[target]}")
        if action == trans[target]:
            correct += 1
print(f"correct {correct*100/(len(test)*rounds)}% ({correct}/{len(test)*rounds})")

In [235]:
dog = Dog(moves, num_possible_commands)

{0: 'CROUCH', 1: 'SIT', 2: 'JUMP', 3: 'WALK'}


In [244]:
CMD = 'GIU'

for k in range(5):
    w, target = train[train['truth'] == CMD].sample(1).values[0]
    command = json.loads(w)

    action = dog.predict(command)
    # print(action)
    reward = -1
    if action == trans[target]:
        reward = 1
    # print(f"""T: {target} A: {action} -> {reward}""")
    dog.learn(command, action, reward)
    print(f"""T: {target} A: {action} -> {reward}""")

tensor([[0.0070, 0.0038, 0.9847, 0.0045]])
T: GIU A: JUMP -> -1
tensor([[0.0064, 0.0036, 0.9858, 0.0042]])
T: GIU A: JUMP -> -1
tensor([[0.0060, 0.0033, 0.9868, 0.0039]])
T: GIU A: JUMP -> -1
tensor([[0.0056, 0.0032, 0.9876, 0.0037]])
T: GIU A: JUMP -> -1
tensor([[0.0052, 0.0030, 0.9883, 0.0035]])
T: GIU A: JUMP -> -1


In [248]:
df.groupby('truth').count()

Unnamed: 0_level_0,wave
truth,Unnamed: 1_level_1
GIU,30
SALTA,30
SEDUTO,30
VAI,30
