# AS : Réseaux de neurones récurrents

## Réseau de neurone récurrent SeqToSeq

## Bibliothèques

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
from charDataset import CharDataset, code2char, char2code
import numpy as np
import json
import csv
import re
%matplotlib inline

## Lecture des données : Tweets de Donald Trump

In [2]:
with open("Donald-Tweets.csv", "r") as f:
    spamreader = csv.reader(f, delimiter=',')
    tweets = []
    vocab = set()
    for row in spamreader:
        text = re.sub(r'[^a-zA-Z0-9_\s]','',row[2].lower()) 
        vocab = vocab | set(text)
        tweets.append(text+" \n")
    tweets = tweets[1:]


In [10]:
vocab = torch.load('vocab.tx')
dictVocab = dict(zip(sorted(vocab),range(len(vocab))))
dictVocab

{u' ': 0,
 u'!': 1,
 u'(': 2,
 u')': 3,
 u',': 4,
 u'.': 5,
 u':': 6,
 u';': 7,
 u'?': 8,
 u'a': 9,
 u'b': 10,
 u'c': 11,
 u'd': 12,
 u'e': 13,
 u'f': 14,
 u'g': 15,
 u'h': 16,
 u'i': 17,
 u'j': 18,
 u'k': 19,
 u'l': 20,
 u'm': 21,
 u'n': 22,
 u'o': 23,
 u'p': 24,
 u'q': 25,
 u'r': 26,
 u's': 27,
 u't': 28,
 u'u': 29,
 u'v': 30,
 u'w': 31,
 u'x': 32,
 u'y': 33,
 u'z': 34}

In [4]:
def code2char(code,vocab):
    vocab_map = dict(zip(vocab.values(),vocab.keys()))
    return "".join(vocab_map[c] for c in code)

def char2code(text,vocab):
    data = torch.ByteTensor(len(text))
    for i,c in enumerate(text):
        data[i]=vocab[c]
    return data

######################################################################

#tensorTweets = [char2code(t,dictVocab) for t in tweets]

### RNN

Modèle

In [102]:
class RNN(nn.Module):
    
    def __init__(self, vocabSize = 39, dim=100):
        super(RNN, self).__init__()
        self.whh = nn.Linear(dim,dim)
        self.wxh = nn.Linear(vocabSize,dim)
        self.why = nn.Linear(dim,vocabSize)
        self.vocabSize = vocabSize
        self.dim = dim
        if torch.cuda.is_available:
            self.whh = self.whh.cuda()
            self.wxh = self.wxh.cuda()
            self.why = self.why.cuda()
        
    def forward(self, x, test=True, maxlength=140, stopWord=0):
        if torch.cuda.is_available:
            lH = [Variable(torch.zeros(x.size(0), self.dim).cuda())]
        else : 
            lH = [Variable(torch.zeros(x.size(0), self.dim))]
        linput = [x[:,0]]
        _, argmax = x[:,0].max(dim=-1)
        maxlength = maxlength if test else x.size(1)
        preds = [argmax]
        predsProbs=[]
        size=1
        while(size<maxlength and preds[-1].data[0] != stopWord):
            h = F.tanh(self.wxh(linput[-1]) + self.whh(lH[-1]))
            lH.append(h)
            predProba = self.why(h)
            predsProbs.append(predProba)
            if test :
                linput.append(predProba)
                preds.append(self.tirage(predProba))
            else:
                linput.append(x[:,size -1])
            size+=1
        return torch.cat(predsProbs),torch.cat(preds)
     
        
    def predict(self,x):
        _,decoded = self.forward(x).max(dim=1)
        return decoded
    
    def tirage(self, distribution):
        distrib = torch.cat([r / r.sum(dim=-1) for r in distribution])
        return distrib.multinomial(1)            

In [103]:
m = RNN()
l = np.array([char2code("aqbr vvdsvf fdjobrejrejmaojbopjreaop br", dictVocab),char2code("rb r", dictVocab)])

def transform_one_hot(digit,n):
    y_onehot = torch.FloatTensor(n)
    y_onehot.zero_()
    y_onehot[digit] = 1
    return y_onehot

def transform_one_hot_sequence(sequence,n):
    seq_onehot = torch.FloatTensor(len(sequence),n)
    for i in range(len(sequence)):
        seq_onehot[i] = transform_one_hot(sequence[i],n)
    return seq_onehot
x = transform_one_hot_sequence(l[0], 35)

print(x.size(0))
print(m(Variable(x), test=False))

39
Variable containing:
 4
[torch.LongTensor of size 1]

<class 'torch.autograd.variable.Variable'>
('prob : ', Variable containing:
-5586.7480
[torch.FloatTensor of size 1]
)
('prob : ', Variable containing:
-11174.2148
[torch.FloatTensor of size 1]
)
('prob : ', Variable containing:
-16756.6973
[torch.FloatTensor of size 1]
)
('prob : ', Variable containing:
-22336.8203
[torch.FloatTensor of size 1]
)
('prob : ', Variable containing:
-27916.5625
[torch.FloatTensor of size 1]
)
('prob : ', Variable containing:
-33496.0859
[torch.FloatTensor of size 1]
)
('prob : ', Variable containing:
-39075.5234
[torch.FloatTensor of size 1]
)
('prob : ', Variable containing:
-44655.1133
[torch.FloatTensor of size 1]
)
('prob : ', Variable containing:
-50234.7031
[torch.FloatTensor of size 1]
)
('prob : ', Variable containing:
-55814.1406
[torch.FloatTensor of size 1]
)
('prob : ', Variable containing:
-61394.9492
[torch.FloatTensor of size 1]
)
('prob : ', Variable containing:
-66988.4844
[torch.Fl

Apprentissage

In [None]:
tailleBatch = len(tensorTweets)

rnn=RNN()
epochs=5

optimizer = torch.optim.SGD(rnn.parameters(), lr=0.001, momentum=0.9)
criterion = torch.nn.CrossEntropyLoss()

for ep in range(epochs):
    sumLoss = 0.0
    for it in range(tailleBatch):
        idx = np.random.randint(0,tailleBatch)
        exemple = tensorTweets[idx]
        x_onehot = torch.FloatTensor(1,len(exemple), len(vocab))
        x_onehot.zero_()
        x_onehot.scatter_(2, exemple.long().view(1,-1,1),1)
        
        if torch.cuda.is_available:
            exemple = exemple.cuda()
            x_onehot = x_onehot.cuda()
        
        probas, preds = rnn(Variable(x_onehot), test=False)
        loss = criterion(probas,Variable(exemple[1:].long()))
        sumLoss+=loss.data[0]
        loss.backward()
        optimizer.step()
        del x_onehot
    print("Loss {}".format(sumLoss))

Loss 97368122.388942
Loss 323187650.3203125


In [10]:
randIdx = np.random.randint(0,len(dataset),15)
x_onehot = torch.FloatTensor(15, 10, dataset.vocab_size)
mini_batch_x = torch.cat([dataset[i][0].view(1,-1).float() for i in randIdx], dim=0)
mini_batch_y = torch.cat([dataset[i][1].view(1,-1).float() for i in randIdx],dim=0)
x_onehot.zero_()
x_onehot.scatter_(2, mini_batch_x.long().view(15,10,1),1)
rnn(Variable(x_onehot.cuda()))

Variable containing:
 0.0390  0.0709 -0.0152  ...  -0.1245  0.1652 -0.1696
 0.0447  0.1991  0.1410  ...   0.0473  0.0858 -0.0955
-0.1926  0.0717  0.3021  ...  -0.1663  0.2360  0.1336
          ...             ⋱             ...          
-0.0493  0.1255  0.2868  ...   0.0699  0.2394 -0.0593
 0.0447  0.1991  0.1410  ...   0.0473  0.0858 -0.0955
-0.1926  0.0717  0.3021  ...  -0.1663  0.2360  0.1336
[torch.cuda.FloatTensor of size 15x512 (GPU 0)]

Variable containing:
1.00000e-02 *
-1.9702  3.9788 -0.1973  ...   0.1168 -3.1222 -1.6446
-1.9702  3.9788 -0.1973  ...   0.1168 -3.1222 -1.6446
-1.9702  3.9788 -0.1973  ...   0.1168 -3.1222 -1.6446
          ...             ⋱             ...          
-1.9702  3.9788 -0.1973  ...   0.1168 -3.1222 -1.6446
-1.9702  3.9788 -0.1973  ...   0.1168 -3.1222 -1.6446
-1.9702  3.9788 -0.1973  ...   0.1168 -3.1222 -1.6446
[torch.cuda.FloatTensor of size 15x512 (GPU 0)]



RuntimeError: size mismatch at /pytorch/torch/lib/THC/generic/THCTensorMathBlas.cu:243

In [8]:
epochs = 2
#rnn = RNN(dim=512)
rnn = torch.load("rnn.torch")
x_onehot = torch.FloatTensor(10, dataset.vocab_size) 
topred = torch.FloatTensor(10,35)
if torch.cuda.is_available:
    x_onehot = x_onehot.cuda()
    topred = topred.cuda()
#optimizer = torch.optim.SGD(rnn.parameters(), lr=0.003, momentum=0.9)
optimizer = torch.load("sgd.torch")
criterion = torch.nn.CrossEntropyLoss()
for ep in range(epochs):
    sumLoss = 0.0
    for i in range(len(dataset)):
        idx = random.randint(0,len(dataset)-1)
        optimizer.zero_grad()
        data,target = dataset[idx][0]-1, dataset[idx][1]-1
        if torch.cuda.is_available():
            data, target = data.cuda(), target.cuda()
        x_onehot.zero_()
        x_onehot.scatter_(1, data.view(-1,1).long(), 1)
        pred = rnn(Variable(x_onehot),test=False)
        loss = criterion(pred,Variable(target.long()))
        sumLoss+= loss.data[0]
        loss.backward()
        optimizer.step()
        if i %100000==0:
            randomChar = random.randint(0,topred.size(1)-1)
            topred.zero_()
            topred[0][randomChar] = 1
            p = rnn(Variable(topred),test=True)
            _,argmax = p.max(dim=1)
            s = decode(argmax.data, dataset.vocab_map)
            print("Loss : {}, Predicted : {}".format(sumLoss, dataset.vocab_map[randomChar+1]+s))
            torch.save(rnn,"rnn.torch")
            torch.save(optimizer,"sgd.torch")
            sumLoss = 0.0
    print(loss)

Loss : 2.5264384746551514, Predicted : re okafofel


  "type " + obj.__name__ + ". It won't be checked "


KeyboardInterrupt: 