In [442]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import json
from sklearn.model_selection import train_test_split
import torch.nn.utils as torch_utils

In [45]:
trans = ['GIU', 'SEDUTO', 'SALTA', 'VAI']

df = pd.read_csv('./train_csv/test.csv', header=None, delimiter=';')
df.columns = ['wave','truth']

# df = df[df['truth'].isin(['SEDUTO','VAI'])]

train, test = train_test_split(df, train_size=100, stratify=df["truth"])
train['wave'] = train['wave'].apply(json.loads)
test['wave'] = test['wave'].apply(json.loads)


In [446]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, dropout_rate=0.5):
        super(Net, self).__init__()
        self.hidden_size = hidden_size
        self.lstm1 = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.lstm2 = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.dropout = nn.Dropout(dropout_rate)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        _, (h_n1, _) = self.lstm1(x)
        _, (h_n2, _) = self.lstm2(h_n1)
        h_n2 = self.dropout(h_n2[-1])
        output = self.fc(h_n2)
        return output


In [656]:
_LIVE_LEARNING_RATE = 0.1
_HIDDEN_SIZE = 64
_MIN_LR = 1e-5
_MAX_LR = 0.005
_DROPOUT_RATE = 0.9
_CLIP_VALUE = 2.0
max_input_size = 100
num_moves = 4

class Dog:
    def __init__(self, num_moves, num_possible_commands):
        self.device = torch.device('cpu')
        self.max_commands = num_possible_commands
        self.model = Net(num_possible_commands, _HIDDEN_SIZE, num_moves, _DROPOUT_RATE)
        self.optimizer = optim.Adam(self.model.parameters(), lr=_LIVE_LEARNING_RATE)
        self.loss_fn = nn.CrossEntropyLoss()
        

    def _get_input(self, command):
        command.extend([0] * (self.max_commands - len(command)))
        tensor_command = torch.tensor(command[:self.max_commands]).unsqueeze(0).unsqueeze(0)
        return tensor_command
    
    def _get_learning_rate(self, output, action, reward):
        p = nn.functional.softmax(output, dim=-1).squeeze()[action]
        m = _MAX_LR-1*_MIN_LR
        q = _MIN_LR
        if reward > 0:
            m = -1*m
            q = _MAX_LR
        print(f"{output}; {action}; {reward}; {m*p + q}")
        return m*p + q

    def predict(self, command):
        with torch.no_grad():
            output = self.model(self._get_input(command))
            return torch.multinomial(nn.functional.softmax(output, dim=-1).squeeze(), 1).item()

    def learn(self, command, action, reward):
        action_t = torch.tensor(action, dtype=torch.long)
        reward_t = torch.tensor(reward, dtype=torch.float)
        
        # self.memory.push(self._get_input(command), action_t, reward_t)
        out = self.model(self._get_input(command)).squeeze(0)
        
        dynamic_lr = self._get_learning_rate(out, action, reward)
        
        optimizer = optim.Adam(self.model.parameters(), lr=dynamic_lr, weight_decay=0.01)
        loss = -torch.log_softmax(out, dim=0)[action_t] * reward_t

        self.optimizer.zero_grad()
        loss.backward()
        torch_utils.clip_grad_norm_(self.model.parameters(), _CLIP_VALUE)
        self.optimizer.step()

In [657]:
dog = Dog(num_moves, max_input_size)

In [682]:
# train
TGT = 'SEDUTO'
MAX = 25
tmp = train[train['truth'] == TGT].sample(MAX)

for w, t in tmp.values.tolist():
    out = dog.predict(w)
    # print(out)
    reward = -1
    if trans[out] == t:
        reward = 1
    
    dog.learn(w, out, reward)

tensor([-1.1867, -1.1963, 20.3553, -5.7022], grad_fn=<SqueezeBackward1>); 0; -1; 1.0000001566368155e-05
tensor([26.3599, -6.3194, -5.7859, -4.8049], grad_fn=<SqueezeBackward1>); 0; -1; 0.004999999888241291
tensor([-18.0852,   7.0783,  21.6893, -13.5701], grad_fn=<SqueezeBackward1>); 0; -1; 9.999999747378752e-06
tensor([ 0.6031,  0.5551, -0.1961, -0.2486], grad_fn=<SqueezeBackward1>); 0; -1; 0.0017735513392835855
tensor([ -0.5201,  12.3971, -13.3785,  10.1601], grad_fn=<SqueezeBackward1>); 0; -1; 1.0011071026383433e-05
tensor([-6.2828, 20.9316, -9.6848,  5.6263], grad_fn=<SqueezeBackward1>); 1; 1; 1.0001473128795624e-05
tensor([ 17.4924,  34.9072,  53.9818, -71.0346], grad_fn=<SqueezeBackward1>); 0; -1; 9.999999747378752e-06
tensor([ 21.6277,  11.1039,   8.2851, -16.2895], grad_fn=<SqueezeBackward1>); 0; -1; 0.004999857861548662
tensor([ 6.4311,  2.4269,  5.0296, -7.4488], grad_fn=<SqueezeBackward1>); 1; 1; 0.004928023088723421
tensor([3.6647, 1.2060, 4.1927, 1.3886], grad_fn=<SqueezeBa

In [683]:
for w,t in test[test['truth'] == TGT].values.tolist():
    print(f"{trans[dog.predict(w)]} - {t}")

SEDUTO - SEDUTO
GIU - SEDUTO
SEDUTO - SEDUTO
SEDUTO - SEDUTO
SEDUTO - SEDUTO


In [684]:
# in order for this shit to work, the input size must be MUCH SMALLER
# maybe 10 or something? sampling on every second
# would that be enough?

In [None]:
#