In [1]:
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

In [2]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

# Char LSTM

Objectif : 

- Apprendre à implémenter un LSTM en pytorch
- Utiliser LSTM +DataLoader
- Tester la différence entre une validation aléatoire & une validation sur période

### I Data

In [4]:
cat =  open('input.txt', 'r')
data = cat.read()
data = list(map(ord, data))

In [5]:
ord_encoder = OrdinalEncoder()
data = ord_encoder.fit_transform(np.array(data).reshape(-1, 1))

In [6]:
encoder= OneHotEncoder()
encoded_data = encoder.fit_transform(data).todense()
encoded_data.shape

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


(1115394, 65)

In [7]:
n_train = 100000
n_valid = 100000
n_test = encoded_data.shape[0]-n_train-n_valid

n_param = encoded_data.shape[1]

### II Modèle 

In [14]:
#paramètres

##LSTM
input_size = n_param
output_size = n_param
hidden_size = 512
num_layers = 3
dropout = 0.5
n_histo = 1
temperature= 2


##Learning
learning_rate = 3e-4
batch_size = 200
epochs = 1000

In [15]:
class LSTM(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden_layer_size = hidden_size
        self.n_histo = n_histo
        self.temperature = temperature
        self.lstm = nn.LSTM(input_size,hidden_size = hidden_size, num_layers = num_layers, dropout=dropout)

        self.hidden_cell = (torch.zeros(num_layers, 1,self.hidden_layer_size).to(device),
                            torch.zeros(num_layers, 1 ,self.hidden_layer_size).to(device))

        self.linear = nn.Linear(self.hidden_layer_size, output_size, bias=False)
        
    def forward(self, input_seq):
        """
        m = input_seq.shape[0]
        l = []
        for i in range(0, m-n_histo):
            l.append(input_seq[i:i+n_histo,:])
        dat = torch.stack(l,1)
         """
        
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(-1,1, n_param), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions /temperature

In [16]:
model = LSTM().cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [17]:
training_set=  encoded_data[:n_train,:]
training_ds = TensorDataset(torch.tensor(training_set[:-1,:], dtype = torch.float32),torch.tensor(data[1:n_train],dtype = torch.int64))
training_dl = DataLoader(training_ds,batch_size=batch_size , shuffle=False)

In [18]:
valid_set=  encoded_data[n_train:n_train+n_valid,:]
X_valid = torch.tensor(valid_set[:-1,:], dtype = torch.float32)
y_valid = torch.tensor(data[n_train+n_histo:n_train+n_valid],dtype = torch.int64)[:,0]
X_train =  torch.tensor(training_set[:-1,:], dtype = torch.float32)
y_train =  torch.tensor(data[n_histo:n_train],dtype = torch.int64)[:,0]

In [None]:
for i in range(epochs):
    
    y_pred_train = model(X_train.to(device))
    single_loss = loss_function(y_pred_train, y_train.to(device))
    y_pred_valid = model(X_valid.to(device))
    valid_loss = loss_function(y_pred_valid, y_valid.to(device))
    
    print(f'epoch: {i:3} loss: {single_loss.item():10.8f} Valid loss: {valid_loss.item():10.8f}')
    
    for seq, labels in training_dl:



        optimizer.zero_grad()
        model.hidden_cell = (torch.zeros(num_layers,1,model.hidden_layer_size).to(device), torch.zeros(num_layers,1,model.hidden_layer_size).to(device))
        y_pred = model(seq.to(device))
        single_loss = loss_function(y_pred, labels[:,0].to(device))
        single_loss.backward()
        optimizer.step()
        
    

        


epoch:   0 loss: 4.17312956 Valid loss: 4.17314816
epoch:   1 loss: 3.30493188 Valid loss: 3.31528616
epoch:   2 loss: 3.30442810 Valid loss: 3.31541514
epoch:   3 loss: 3.30400252 Valid loss: 3.31539559
epoch:   4 loss: 3.30423808 Valid loss: 3.31575131
epoch:   5 loss: 3.30419874 Valid loss: 3.31582165
epoch:   6 loss: 3.30389762 Valid loss: 3.31561947
epoch:   7 loss: 3.30350089 Valid loss: 3.31504107
epoch:   8 loss: 2.66118383 Valid loss: 2.75695515
epoch:   9 loss: 2.40609670 Valid loss: 2.54462481
epoch:  10 loss: 2.28527522 Valid loss: 2.44916296
epoch:  11 loss: 2.18329597 Valid loss: 2.37168121


In [None]:
def decode_txt(seq):
    cate = ord_encoder.inverse_transform(encoder.inverse_transform(np.array(seq)))
    txt = ''.join(list(map(chr,cate)))
    return txt

In [None]:
seq = X_train[-100:]
a= decode_txt(seq)
print(a)

In [None]:
model.hidden_cell = (torch.zeros(num_layers,1,model.hidden_layer_size).to(device), torch.zeros(num_layers,1,model.hidden_layer_size).to(device))

for i in range(400):
    next_chr =  model(seq.to(device))
    arr = F.softmax(next_chr[-1,:]).detach().cpu().numpy()
    i =  np.argmax(arr)
    a = torch.zeros(1,65)
    a[0,i] = 1
    seq = torch.cat([seq, a])

In [None]:
a= decode_txt(seq)
print(a)

In [21]:
seq= seq[-30:,:]

### Modele using Hold-Out