In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import json
from sklearn.model_selection import train_test_split

In [45]:
trans = ['GIU', 'SEDUTO', 'SALTA', 'VAI']

df = pd.read_csv('./train_csv/test.csv', header=None, delimiter=';')
df.columns = ['wave','truth']

# df = df[df['truth'].isin(['SEDUTO','VAI'])]

train, test = train_test_split(df, train_size=100, stratify=df["truth"])
train['wave'] = train['wave'].apply(json.loads)
test['wave'] = test['wave'].apply(json.loads)


In [18]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Net, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        _, (h_n, _) = self.lstm(x)
        output = self.fc(h_n[-1])
        return output

In [56]:
_LIVE_LEARNING_RATE = 0.001
_HIDDEN_SIZE = 128
max_input_size = 100
num_moves = 4

class Dog:
    def __init__(self, num_moves, num_possible_commands):
        self.device = torch.device('cpu')
        self.max_commands = num_possible_commands
        self.model = Net(num_possible_commands, _HIDDEN_SIZE, num_moves)
        self.optimizer = optim.Adam(self.model.parameters(), lr=_LIVE_LEARNING_RATE)
        self.loss_fn = nn.CrossEntropyLoss()
        

    def _get_input(self, command):
        command.extend([0] * (self.max_commands - len(command)))
        tensor_command = torch.tensor(command[:self.max_commands]).unsqueeze(0).unsqueeze(0)
        return tensor_command

    def predict(self, command):
        with torch.no_grad():
            output = self.model(self._get_input(command))
            return torch.multinomial(nn.functional.softmax(output, dim=-1).squeeze(), 1).item()

    def learn(self, command, action, reward):
        action_t = torch.tensor(action, dtype=torch.long)
        reward_t = torch.tensor(reward, dtype=torch.float)
        
        # self.memory.push(self._get_input(command), action_t, reward_t)
        out = self.model(self._get_input(command)).squeeze(0)
        loss = -torch.log(out[action_t]) * reward_t

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

In [74]:
dog = Dog(num_moves, max_input_size)

In [87]:
# train
TGT = 'GIU'
MAX = 25
tmp = train[train['truth'] == TGT].sample(MAX)

for w, t in tmp.values.tolist():
    out = dog.predict(w)
    reward = -1
    if trans[out] == t:
        reward = 1
    
    dog.learn(w, out, reward)

tensor([-0.0446, -0.0043, -0.0032,  0.0312], grad_fn=<SqueezeBackward1>)
tensor([-0.0432, -0.0030, -0.0035,  0.0297], grad_fn=<SqueezeBackward1>)
tensor([-0.0449,  0.0006, -0.0048,  0.0279], grad_fn=<SqueezeBackward1>)
tensor([-0.0466,  0.0037, -0.0033,  0.0261], grad_fn=<SqueezeBackward1>)
tensor([-0.0487,  0.0066, -0.0007,  0.0243], grad_fn=<SqueezeBackward1>)
tensor([-0.0478,  0.0082,  0.0028,  0.0228], grad_fn=<SqueezeBackward1>)
tensor([-0.0513,  0.0114,  0.0048,  0.0191], grad_fn=<SqueezeBackward1>)
tensor([-0.0725,  0.0227,  0.0026,  0.0099], grad_fn=<SqueezeBackward1>)
tensor([-0.0495,  0.0134,  0.0066,  0.0156], grad_fn=<SqueezeBackward1>)
tensor([-0.0508,  0.0151,  0.0072,  0.0127], grad_fn=<SqueezeBackward1>)
tensor([-0.0510,  0.0156,  0.0078,  0.0103], grad_fn=<SqueezeBackward1>)
tensor([-0.0516,  0.0162,  0.0083,  0.0081], grad_fn=<SqueezeBackward1>)
tensor([-0.0547,  0.0180,  0.0076,  0.0051], grad_fn=<SqueezeBackward1>)
tensor([-0.0542,  0.0175,  0.0076,  0.0038], grad_f

In [94]:
for w,t in test[test['truth'] == TGT].values.tolist():
    print(f"{trans[dog.predict(w)]} - {t}")

SALTA - GIU
SEDUTO - GIU
VAI - GIU
SEDUTO - GIU
SEDUTO - GIU
