In [1]:
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

In [2]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
cpu = torch.device("cpu")

# Char LSTM

This notebook has different goals :

    - Reproducing the results of Andrej Karpathy (http://karpathy.github.io/2015/05/21/rnn-effectiveness/) on the task of classification for time series
    - Testing the influence of hold-out selection of 

### I Data

In [3]:
cat =  open('trump_reduce.txt', 'r')
data = cat.read()
data = list(map(ord, data))

In [4]:
ord_encoder = OrdinalEncoder()
data = ord_encoder.fit_transform(np.array(data).reshape(-1, 1))

In [5]:
encoder= OneHotEncoder()
encoded_data = encoder.fit_transform(data).todense()
encoded_data.shape

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


(6803309, 496)

In [6]:
n_train = 5000000
n_valid = 10000
n_test = encoded_data.shape[0]-n_train-n_valid

n_param = encoded_data.shape[1]

### II Modèle 

In [7]:
#paramètres

##LSTM
input_size = n_param
output_size = n_param
hidden_size = 512
num_layers = 3
dropout = 0.5
n_histo = 1
temperature= 2


##Learning
learning_rate = 1e-4
batch_size = 200
epochs = 1000

In [8]:
class LSTM(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden_layer_size = hidden_size
        self.n_histo = n_histo
        self.temperature = temperature
        self.lstm = nn.LSTM(input_size,hidden_size = hidden_size, num_layers = num_layers, dropout=dropout)

        self.hidden_cell = (torch.zeros(num_layers, 1,self.hidden_layer_size).to(device),
                            torch.zeros(num_layers, 1 ,self.hidden_layer_size).to(device))

        self.linear = nn.Linear(self.hidden_layer_size, output_size, bias=False)
        
    def forward(self, input_seq):
        """
        m = input_seq.shape[0]
        l = []
        for i in range(0, m-n_histo):
            l.append(input_seq[i:i+n_histo,:])
        dat = torch.stack(l,1)
         """
        
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(-1,1, n_param), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions /temperature

In [9]:
model = LSTM().cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
training_set=  encoded_data[:n_train,:]
training_ds = TensorDataset(torch.tensor(training_set[:-1,:], dtype = torch.float32),torch.tensor(data[1:n_train],dtype = torch.int64))
training_dl = DataLoader(training_ds,batch_size=batch_size , shuffle=False)

In [11]:
valid_set=  encoded_data[n_train:n_train+n_valid,:]
X_valid = torch.tensor(valid_set[:-1,:], dtype = torch.float32)
y_valid = torch.tensor(data[n_train+n_histo:n_train+n_valid],dtype = torch.int64)[:,0]
X_train =  torch.tensor(training_set[-1000:-1,:], dtype = torch.float32)
y_train =  torch.tensor(data[n_train - 1000 + n_histo:n_train],dtype = torch.int64)[:,0]

In [12]:
import copy

def evaluate(model, X_train, X_valid, y_train, y_valid):
    
    copy_model = model.cpu()
    copy_model.hidden_cell = (torch.zeros(num_layers, 1,model.hidden_layer_size),
                            torch.zeros(num_layers, 1 ,model.hidden_layer_size))

    y_pred_train = copy_model(X_train)
    single_loss = loss_function(y_pred_train, y_train).item()
    y_pred_valid = copy_model(X_valid)
    valid_loss = loss_function(y_pred_valid, y_valid).item()
    
    print(f'epoch: {i:3} loss: {single_loss:10.8f} Valid loss: {valid_loss:10.8f}')
    return valid_loss


In [None]:
i=0
old_loss = 1000
valid_loss = 999
while old_loss >=valid_loss or i < 20:
    old_loss = valid_loss
    valid_loss =  evaluate(model, X_train, X_valid, y_train, y_valid)
    model.cuda()
    
    for seq, labels in training_dl:
        optimizer.zero_grad()
        model.hidden_cell = (torch.zeros(num_layers,1,model.hidden_layer_size).to(device), torch.zeros(num_layers,1,model.hidden_layer_size).to(device))
        y_pred = model(seq.to(device))
        single_loss = loss_function(y_pred, labels[:,0].to(device))
        single_loss.backward()
        optimizer.step()
        
    i +=1

        


epoch:   0 loss: 6.20618391 Valid loss: 6.20606422
epoch:   1 loss: 2.51650858 Valid loss: 2.58922052
epoch:   2 loss: 1.98704743 Valid loss: 1.94657922
epoch:   3 loss: 1.73119700 Valid loss: 1.64906275


In [18]:
def decode_txt(seq):
    cate = ord_encoder.inverse_transform(encoder.inverse_transform(np.array(seq)))
    txt = ''.join(list(map(chr,cate)))
    return txt

In [19]:
seq = X_train[-100:]
a= decode_txt(seq)
print(a)

r me as for you?

GREMIO:
But so is not she.

TRANIO:
For what reason, I beseech you?

GREMIO:
For t


In [20]:
model.hidden_cell = (torch.zeros(num_layers,1,model.hidden_layer_size).to(device), torch.zeros(num_layers,1,model.hidden_layer_size).to(device))

for i in range(1000):
    next_chr =  model(seq.to(device))
    arr = F.softmax(next_chr[-1,:]).detach().cpu().numpy()
    i =  np.argmax(arr)
    a = torch.zeros(1,n_param)
    a[0,i] = 1
    seq = torch.cat([seq, a])

  """


RuntimeError: CUDA out of memory. Tried to allocate 32.00 MiB (GPU 0; 15.90 GiB total capacity; 15.02 GiB already allocated; 11.88 MiB free; 171.61 MiB cached)

In [21]:
a= decode_txt(seq)
print(a)

r me as for you?

GREMIO:
But so is not she.

TRANIO:
For what reason, I beseech you?

GREMIO:
For the deserves the words and so much sound
To see the seas of love and faith and her.

LUCENTIO:
That I was a pardon make a state of men
And shall be so fair and so far a state,
And then a man to see the words of her,
And therefore have so much a sick of her.

LUCENTIO:
That is the world will have me well and some of
the world of her father.

LUCENTIO:
I will not see the world.

DUKE VINCENTIO:
I am a prisoner to my lord.

LUCENTIO:
This is the seath of heaven and her father,
And that he would be so and the death of him.

LUCENTIO:
This is a man of her father and the world,
And therefore I have seen the provise of him.

LUCENTIO:
Sirrah, I shall be a prison, sirrah, I
was a fool and see that we are to the world.

LUCENTIO:
This is the strange of her son and some souls
and strange and that with the strange of the worl


In [23]:
torch.save(model.state_dict(), 'model_trump_512_3_0.5_temp=2.pt')

In [22]:
a

'es should know....\nThe Legendary Henry Ford and Alfred P. Sloan the Founders of Ford Motor Company a                                                                                                                                                                                                                                                                                                                                                                                                                '

### Modele using Hold-Out

In [16]:
mo= LSTM().cuda()

In [18]:
mo.cpu()(X_train)


RuntimeError: Input and hidden tensors are not at the same device, found input tensor at cpu and hidden tensor at cuda:0