In [1]:
# This notebook introduces many CNN for NLP

In [2]:
# Import library
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.optim as optim
from nltk.tokenize import word_tokenize
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer
from torch.utils.data import Dataset, DataLoader
import gensim
from sklearn.metrics import f1_score, precision_score, recall_score
import gc
import os
from torch.optim.lr_scheduler import ReduceLROnPlateau
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to
[nltk_data]     /home/ama/audibeal/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/ama/audibeal/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/ama/audibeal/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

# 1) Preprocessing Function

In [5]:
def preprocessing(data_base, stemming=False, lemmatisation=False):
    """ Preprocessing Data, remove punctuation, figures
    and add lemmatisation or stemming and tokenization
    Parameters
    ----------
    data_base: numpy.ndarray
    stemming: boolean, default: False
    lemmatisation: boolean, default: False
    Returns
    -------
    maximum: int
        Represents the maximum length of sentence.
    dataset_tokenization: list:
    """
    dataset_tokenization = []
    maximum = 0
    for document in data_base:
        document = document.lower()
        token = word_tokenize(document)
        token = [word for word in token if word.isalpha()]

        # remove non-relevant word
        new_word = \
            [word for word in token if word not in stopwords.words('english')]

        # Stemming part
        if stemming:
            porter = PorterStemmer()
            new_word = [porter.stem(word) for word in new_word]

        # Lemmatisation part
        if lemmatisation:
            lemmatizer = WordNetLemmatizer()
            new_word = [lemmatizer.lemmatize(word) for word in new_word]

        # Update Variables
        maximum = max(maximum, len(new_word))
        dataset_tokenization.append(new_word)
    return maximum, dataset_tokenization


In [6]:
def create_dic_voc(voc_known):
    """ Create a dictionnary of important words
    Parameters
    ----------
    voc_known: mapping dictionnary
    Returns
    -------
    vocabulary: list
    """
    vocabulary = {}
    for index, token in enumerate(voc_known):
        vocabulary[token] = index
    return vocabulary


In [7]:
def weight_matrix_emb(vocabulary, model):
    """Create Matrix for word embedding
    Parameters
    ----------
    vocabulary : mapping dictionnary
        Represent interesting vocabulary
    model:
        Represents a word2Vec model
    Returns
    -------
    weights_matrix: numpy.ndarray
        Represents weights for nn.embedding
    final_dico: mapping dictionnary
    """
    # Variables
    matrix_len = len(vocabulary)
    len_emb = len(model['law'])
    know_word = 0
    shift = 2
    weights_matrix = np.zeros((matrix_len + shift, len_emb))
    final_dico = {}
    # clef space : index = 0
    # clef unknwn: index = 1

    for i, word in enumerate(vocabulary):
        if word in model:
            weights_matrix[i + shift] = model[word]
            final_dico[word] = i + shift
            know_word += 1
        else:
            weights_matrix[i + shift] = \
                np.random.normal(scale=0.6, size=(len_emb, ))
            final_dico[word] = i + shift
    # Return final dico/ and matrix weight corresponding
    print('{0}% are known by pre-trained model'.format(know_word / matrix_len))
    return weights_matrix, final_dico

In [8]:
def transform_data(data, maximum, final_dico, remove_unknown_word=False):
    """ Transform our data into good format for training
    Parameters
    ----------
    data: numpy.ndarray
        X is the word tokenize to change into number
    maximum: int
        Maximum dimension
    model:
        Represents a word2Vec model
    Returns
    -------
    data_: tensor
    """
    data_ = torch.zeros(len(data), maximum, dtype=torch.long)
    for id_text, text in enumerate(data):
        id_token = 0
        for _, token in enumerate(text):
            if id_token == maximum:
                break
            if token in final_dico:
                data_[id_text, id_token] = final_dico[token]
                id_token += 1
            else:
                if not(remove_unknown_word):
                    data_[id_text, id_token] = 1
                    id_token += 1
    return data_

In [9]:
def frequency(data):
    """ Frequency of words in our datatset
    Parameters
    ----------
    data: numpy.ndarray
        data is the word tokenize
    Returns
    -------
    dico_freq: mapping dictionnary
    """
    dico_freq = {}
    for text in data:
        current_dico = {}
        for token in text:
            if token in dico_freq and token not in current_dico:
                dico_freq[token] += 1
                current_dico[token] = 0
            elif token not in current_dico:
                dico_freq[token] = 1
            else:
                pass
    return dico_freq

In [10]:
def frequence_keep(data, thresh=5):
    """ Keep only words with hight frequence
    Parameters
    ----------
    data: numpy.ndarray
        data is the word tokenize
    thresh: int, default: 5
    Returns
    -------
    _ : list(char)
    """
    dico_feq = frequency(data)
    voc_known = [element for element in dico_feq if dico_feq[element] >= thresh]
    vocabulary = {}
    for index, token in enumerate(voc_known):
        vocabulary[token] = index
    return vocabulary

In [11]:
def clean_cuda():
    """
    Remove Useless element on GPU
    """
    gc.collect()
    torch.cuda.empty_cache()

In [12]:
def new_score(y_true, y_pred, average='macro'):
    """ Evalute our model with f1, recall precision
    Parameters
    ----------
    y_true: torch/numpy.ndarray
    y_pred: torch/numpy.ndarray
    dic_index: dictionnary
    average: String
        Represent type of f1
    """
    f1 = f1_score(y_true, y_pred, average=average)
    precision = precision_score(y_true, y_pred, average=average)
    recall = recall_score(y_true, y_pred, average=average)
    accuracy = sum(y_pred == y_true) / len(y_pred)
    print('Accuracy: {0} \n F1: {1} \n precision : {2} \n recall : {3} '
          .format(accuracy, f1, precision, recall))

# 2) Dataset

In [13]:
class DataSet(Dataset):
    """Create Dataset
    """
    def __init__(self, x, y):
        self.x = x
        self.y = y
        if len(self.x) != len(self.y):
            raise Exception("The length of X does not match the length of Y")

    def __len__(self):
        return len(self.x)

    def __getitem__(self, index):
        # Note that this isn't randomly selecting.
        # It's a simple get a single item that represents an x and y
        x = self.x[index]
        y = self.y[index]
        return x, y

# 3) Models

In [14]:
class CnnKim(nn.Module):
    """ Kim CNN
    Parameters
    ----------
    matrix_weight: torch.float
    len_sentence: int
    kernel_sizes: list
    out_channel_len: int
    """
    def __init__(self,
                 matrix_weight,
                 len_sentence,
                 kernel_sizes=[3, 4, 5],
                 out_channel_len=100):
        super(CnnKim, self).__init__()
        self.embbeding_1 = nn.Embedding(matrix_weight.shape[0],
                                        matrix_weight.shape[1])
        self.embbeding_1.weight.data = matrix_weight
        self.embbeding_1.weight.requires_grad = False
        self.embbeding_2 = nn.Embedding(matrix_weight.shape[0],
                                        matrix_weight.shape[1])
        self.embbeding_2.weight.data = torch.clone(matrix_weight)
        self.embbeding_2.weight.requires_grad = True

        block = []
        for kernel_size in kernel_sizes:
            conv1d = nn.Conv2d(in_channels=2,
                               out_channels=out_channel_len,
                               kernel_size=(kernel_size,
                                            matrix_weight.shape[1]))
            component = nn.Sequential(
                conv1d,
                nn.ReLU(),
                nn.Flatten(2),
                nn.MaxPool1d(kernel_size=len_sentence, ceil_mode=True))
            block.append(component)
        self.block = nn.ModuleList(block)
        self.linear = nn.Linear(out_channel_len * len(kernel_sizes), 2)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x1 = self.embbeding_1(x)
        x2 = self.embbeding_2(x)
        x = torch.cat((x1.unsqueeze(1), x2.unsqueeze(1)), 1)
        x_list = [conv_block(x) for conv_block in self.block]
        x = torch.cat(x_list, 2)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        return self.linear(x)

In [15]:
class XmlCnn(nn.Module):
    """ Kim CNN
    Parameters
    ----------
    matrix_weight: torch.float
    len_sentence: int
    kernel_sizes: list
    nb_max_pool: int
    nb_output: int
    """
    def __init__(self, matrix_weight,
                 len_sentence, kernel_sizes=[2, 4, 8],
                 nb_max_pool=10,
                 nb_output=32):

        super(XmlCnn, self).__init__()
        self.nb_max_pool = nb_max_pool
        self.nb_output = nb_output
        self.embbeding_1 = nn.Embedding(matrix_weight.shape[0],
                                        matrix_weight.shape[1])
        self.embbeding_1.weight.data = matrix_weight
        self.embbeding_1.weight.requires_grad = False

        self.embbeding_2 = nn.Embedding(matrix_weight.shape[0],
                                        matrix_weight.shape[1])
        self.embbeding_2.weight.data = torch.clone(matrix_weight)
        self.embbeding_2.weight.requires_grad = True

        block = []
        for kernel_size in kernel_sizes:
            maxpool_size = int((len_sentence - kernel_size + 1)/nb_max_pool) + 1
            conv1d = nn.Conv2d(in_channels=2,
                               out_channels=self.nb_output,
                               kernel_size=(kernel_size,
                                            matrix_weight.shape[1]))
            component = nn.Sequential(
                conv1d,
                nn.ReLU(),
                nn.Flatten(2),
                nn.ConstantPad1d((0, nb_max_pool), 0),
                nn.MaxPool1d(kernel_size=maxpool_size, stride=maxpool_size))
            block.append(component)
        self.block = nn.ModuleList(block)

        self.Bottleneck = nn.Linear(nb_output * nb_max_pool * len(kernel_sizes),
                                    512)
        self.linear = nn.Linear(512, 2)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x1 = self.embbeding_1(x)
        x2 = self.embbeding_2(x)
        x = torch.cat((x1.unsqueeze(1), x2.unsqueeze(1)), 1)
        x_list = [conv_block(x) for conv_block in self.block]
        x = torch.cat(x_list, 2)
        x = x.view(x.size(0), -1)
        x = self.Bottleneck(x)
        x = self.dropout(x)
        return self.linear(x)

In [16]:
class ResnetBlock(nn.Module):
    def __init__(self, channel_size):
        super(ResnetBlock, self).__init__()
        self.channel_size = channel_size
        self.maxpool = nn.Sequential(
            nn.ConstantPad1d(padding=(0, 1), value=0),
            nn.MaxPool1d(kernel_size=3, stride=2)
        )
        self.conv = nn.Sequential(
            nn.BatchNorm1d(num_features=self.channel_size),
            nn.ReLU(),
            nn.Conv1d(self.channel_size, self.channel_size,
                      kernel_size=3, padding=1),
            nn.BatchNorm1d(num_features=self.channel_size),
            nn.ReLU(),
            nn.Conv1d(self.channel_size, self.channel_size,
                      kernel_size=3, padding=1),
        )

    def forward(self, x):
        x_shortcut = self.maxpool(x)
        x = self.conv(x_shortcut)
        x = x + x_shortcut
        return x


class DPCNN(nn.Module):
    def __init__(self, matrix_weight, len_seq, channel_dim=200):
        super(DPCNN, self).__init__()
        self.embbeding_1 = nn.Embedding(matrix_weight.shape[0],
                                        matrix_weight.shape[1])
        self.embbeding_1.weight.data = matrix_weight
        self.embbeding_1.weight.requires_grad = False
        self.embbeding_2 = nn.Embedding(matrix_weight.shape[0],
                                        matrix_weight.shape[1])
        self.embbeding_2.weight.data = torch.clone(matrix_weight)
        self.embbeding_2.weight.requires_grad = True
        # region embedding
        self.region_embedding = nn.Sequential(
            nn.Conv2d(2, channel_dim, kernel_size=(3, 200), padding=(1, 0)),
            nn.Flatten(2),
            nn.BatchNorm1d(channel_dim),
            nn.ReLU(),
            nn.Dropout(0.2)
        )
        self.conv_block = nn.Sequential(
            nn.BatchNorm1d(num_features=channel_dim),
            nn.ReLU(),
            nn.Conv1d(channel_dim, channel_dim, kernel_size=3, padding=1),
            nn.BatchNorm1d(num_features=channel_dim),
            nn.ReLU(),
            nn.Conv1d(channel_dim, channel_dim, kernel_size=3, padding=1),
        )

        self.num_seq = len_seq
        resnet_block_list = []
        while (self.num_seq > 2):
            resnet_block_list.append(ResnetBlock(channel_dim))
            self.num_seq = self.num_seq // 2

        self.resnet_layer = nn.Sequential(*resnet_block_list)
        self.fc = nn.Sequential(
            nn.Linear(channel_dim * self.num_seq, 2),
            nn.BatchNorm1d(2),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(2, 2)
        )

    def forward(self, x):
        x1 = self.embbeding_1(x)
        x2 = self.embbeding_2(x)
        x = torch.cat((x1.unsqueeze(1), x2.unsqueeze(1)), 1)
        x = self.region_embedding(x)
        x = self.conv_block(x)
        x = self.resnet_layer(x)
        x = x.view(x.size(0), -1)
        out = self.fc(x)
        return out

In [17]:
class CnnPart(nn.Module):
    def __init__(self,
                 matrix_weight,
                 len_sentence,
                 kernel_sizes=[1, 2, 3],
                 out_channel_len=32):
        super(CnnPart, self).__init__()
        self.embbeding_1 = nn.Embedding(matrix_weight.shape[0],
                                        matrix_weight.shape[1])
        self.embbeding_1.weight.data = matrix_weight
        self.embbeding_1.weight.requires_grad = False
        self.embbeding_2 = nn.Embedding(matrix_weight.shape[0],
                                        matrix_weight.shape[1])
        self.embbeding_2.weight.data = torch.clone(matrix_weight)
        self.embbeding_2.weight.requires_grad = True

        block = []
        for kernel_size in kernel_sizes:
            conv1d = nn.Conv2d(in_channels=2,
                               out_channels=out_channel_len,
                               kernel_size=(kernel_size,
                                            matrix_weight.shape[1]))
            component = nn.Sequential(
                conv1d,
                nn.ReLU(),
                nn.Flatten(2),
                nn.MaxPool1d(kernel_size=len_sentence, ceil_mode=True))
            block.append(component)
        self.dropout = nn.Dropout(0.2)
        self.block = nn.ModuleList(block)

    def forward(self, x):
        x1 = self.embbeding_1(x)
        x2 = self.embbeding_2(x)
        x = torch.cat((x1.unsqueeze(1), x2.unsqueeze(1)), 1)
        x = self.dropout(x)
        x_list = [conv_block(x) for conv_block in self.block]
        x = torch.cat(x_list, 2)
        return x

In [18]:
class BiLstmAttention(nn.Module):
    def __init__(self, hidden_size, embedding_dim):
        super(BiLstmAttention, self).__init__()
        self.lstm = nn.LSTM(embedding_dim,
                            hidden_size,
                            batch_first=True,
                            bidirectional=True)

    def forward(self, x):
        """Perform a single decoder step (1 word)"""
        output, (hn, cn) = self.lstm(x)
        hn = hn.reshape(hn.shape[1], -1)
        return output, hn

In [19]:
class BahdanauAttention_1(nn.Module):
    def __init__(self, init, dim):
        super(BahdanauAttention,self).__init__()
        self.attention_key = nn.Linear(init, dim, bias=False)
        self.attention_query = nn.Linear(init, dim, bias=False)
        self.v = nn.Linear(dim, 1)
        self.attention_vec = nn.Linear(2 * init, 128, bias=False)
        self.activation = nn.Tanh()
        self.activation_weight = nn.Softmax(dim=1)

    def forward(self, query, keys):
        scores =\
            self.v(self.activation(self.attention_query(query).unsqueeze(1) + 
                                   self.attention_key(keys)))
        scores = scores.reshape(scores.shape[0], -1)
        attention_weight = self.activation_weight(scores)
        vector_context = torch.bmm(attention_weight.unsqueeze(1), keys)
        pred = torch.cat((vector_context.squeeze(1), query), 1)
        attention_vector = self.activation(self.attention_vec(pred))
        return attention_vector

In [37]:
class BahdanauAttentio_1(nn.Module):
    def __init__(self, init, dim):
        super(BahdanauAttention,self).__init__()
        self.attention_key = nn.Linear(init, dim, bias=False)
        self.attention_query = nn.Linear(3, dim, bias=False)
        self.context_weight = nn.Parameter(torch.Tensor(int(dim), 1))
        self.activation = nn.Tanh()
        self.linear = nn.Linear(init, int(init/2))
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, hn, x):
        scores = self.activation(self.attention_query(x) + self.attention_key(hn))
        scores = scores @ self.context_weight
        weight = self.softmax(scores).permute(0, 2, 1)
        attention_vector = torch.bmm(weight, hn)
        return attention_vector

In [41]:
class BahdanauAttention(nn.Module):
    def __init__(self, init, dim):
        super(BahdanauAttention,self).__init__()
        self.context_weight = nn.Linear(init, 1, bias=False)
        self.linear = nn.Linear(init, init)
        self.softmax = nn.Softmax(dim=1)
        self.scale = 1.0/np.sqrt(init)
        
    def forward(self, hn, x):
        u = self.linear(hn)
        weight = self.softmax(self.context_weight(u).mul_(self.scale)).permute(0, 2, 1)
        attention_vector = torch.bmm(weight, hn) 
        return attention_vector

In [32]:
class BahdanauAttention_X(nn.Module):
    def __init__(self, init, dim):
        super(BahdanauAttention,self).__init__()
        self.multihead = nn.MultiheadAttention(init, 8)
        self.query = nn.Parameter(torch.empty(4, init).normal_(mean=0.1,std=0.5))
        
    def forward(self, hn, x):
        hn = hn.permute(1, 0, 2)
        query = self.query.repeat(hn.shape[1],1 , 1)
        query = query.permute(1, 0, 2)
        attention_vector, _ = self.multihead(query, hn, hn )
        attention_vector = attention_vector.permute(1, 0, 2)
        return attention_vector

In [33]:
class CnnLstm(nn.Module):
    def __init__(self,
                 cnn,
                 lstm,
                 attention,
                hidden_size):
        super(CnnLstm,self).__init__()
        self.cnn = cnn
        self.LSTM = lstm
        self.attention = attention
        self.dropout = nn.Dropout(0.5)
        self.predict = nn.Linear(2*hidden_size, 2)#4

    def forward(self, x):
        x = self.cnn(x)
        output, hn = self.LSTM(x)
        attention_vector = self.attention(output, x)
        attention_vector = attention_vector.contiguous() 
        attention_vector = attention_vector.view(attention_vector.shape[0], -1)
        attention_vector = self.dropout(attention_vector)
        return self.predict(attention_vector)

# 4) Call Backs

In [27]:
class CallBack():
    def __init__(self, lr, factor=0.1, patient=2, path='valider.pth'):
        self.memory_val = []
        self.path = path
        
    def add_val(self, mean_loss):
        self.memory_val.append(mean_loss)

    def save_best(self, model):
        if self.memory_val[-1] == min(self.memory_val):
            print('====== Save New model ===== ')
            print(self.memory_val)
            torch.save(model.state_dict(), self.path)

In [28]:
def train_callback(model, trainer, valider, name_save, w=torch.tensor([9., 1.]).cuda(), weight_decay=0, epochs=10, lr=10e-3):
    criterion = nn.CrossEntropyLoss(weight=w)
    callback = CallBack(lr, path=name_save + '.pth')
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience = 2)
    for epoch in range(epochs):  
        running_loss = 0.0
        mean = 0.0
        nb = 0
        for i, data in enumerate(trainer):
            inputs, labels = data
            if torch.cuda.is_available():
                inputs, labels = inputs.cuda(), labels.cuda()
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels.long())
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            # Affichage training part results
            if i % 50 == 49:
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 50))
                nb += 1
                mean += running_loss/50
                running_loss = 0.0
        print('---- Mean loss = %.3f ----' % (mean / nb), end='')
        
        # Work on validation set
        model.eval()
        with torch.no_grad():
            loss_val = 0
            for i, data in enumerate(valider):
                inputs, labels = data
                if torch.cuda.is_available():
                    inputs, labels = inputs.cuda(), labels.cuda()
                outputs = model(inputs)
                loss_val += criterion(outputs, labels.long())
            print('---- Loss Val %.3f----' % loss_val)
            callback.add_val(loss_val)
            callback.save_best(model)
        model.train()
        curr_lr = optimizer.param_groups[0]['lr']
        print('=== Current lr: %.3f===' % curr_lr)
        scheduler.step(loss_val)
        
    print('Finished Training')

# 5) Download and preprocessing our Data

In [23]:
stemmatization = False
lemmatization = True
truncated = True
remove_unknow_word = False

In [21]:
data = pd.read_pickle("data2word.pkl")

In [25]:
X_train = data.loc['EN_train_Anon']["Fact"].values
Y_train = data.loc['EN_train_Anon']["Violation"].values

X_val = data.loc['EN_dev_Anon']["Fact"].values
Y_val = data.loc['EN_dev_Anon']["Violation"].values

X_test = data.loc['EN_test_Anon']["Fact"].values
Y_test = data.loc['EN_test_Anon']["Violation"].values

In [26]:
maximum, X_train = preprocessing(X_train, stemming=stemmatization, lemmatisation=lemmatization)
print("1")
dic_known = frequence_keep(X_train)
print("2")
maximum_1, X_val = preprocessing(X_val, stemming=stemmatization, lemmatisation=lemmatization)
maximum_2, X_test = preprocessing(X_test, stemming=stemmatization, lemmatisation=lemmatization)

1
2


In [27]:
X_ = X_train

In [28]:
if truncated:
    max_length = maximum
else:
    max_length = max(maximum, maximum_1, maximum_2)

In [29]:
model = gensim.models.KeyedVectors.load_word2vec_format('Law2Vec.200d.txt', binary=False)

In [30]:
dictionnary = create_dic_voc(dic_known)
print('nb word in my dictionnary ', len(dictionnary))

nb word in my dictionnary  16597


In [31]:
# Create matrix of weight for first embedding
weight_matrix, final_dico = weight_matrix_emb(dictionnary, model)

0.8524432126287883% are known by pre-trained model


In [32]:
# ncoding_data
X_train = transform_data(X_train, max_length, final_dico, remove_unknow_word)
print("Step 1 good")
X_test = transform_data(X_test, max_length, final_dico, remove_unknow_word)
print("Step 2 good")
X_val = transform_data(X_val, max_length, final_dico, remove_unknow_word)
print("Step 3 good")

Step 1 good
Step 2 good
Step 3 good


In [33]:
# Good format:
Y_train = torch.tensor(Y_train).float()
Y_test = torch.tensor(Y_test).float()
Y_val = torch.tensor(Y_val).float()

In [34]:
if save:
    torch.save(X_train, name_save + 'X_train' + '.pt')
    torch.save(X_test, name_save + 'X_test' + '.pt')
    torch.save(X_val, name_save + 'X_val' + '.pt')

    torch.save(Y_train, name_save + 'Y_train' + '.pt')
    torch.save(Y_test, name_save + 'Y_test' + '.pt')
    torch.save(Y_val, name_save + 'Y_val' + '.pt')
    np.save(name_save + 'weight.npy', weight_matrix)

NameError: name 'save' is not defined

In [None]:
data_choose = 2
save = True
first_folder = 'data/'
possible_path = ['summary', 'keep_relevant', 'remove_relevant']

#Test
for element in possible_path:
    if not(os.path.exists(first_folder + element)):
        raise NameError(element)

print('You have choose the data: ', possible_path[data_choose])

name_save = first_folder + possible_path[data_choose] + '/'

In [23]:
if download:
    X_train = torch.load(name_download + 'X_train' + '.pt')
    X_test = torch.load(name_download + 'X_test' + '.pt')
    X_val = torch.load(name_download + 'X_val' + '.pt')

    Y_train = torch.load(name_download + 'Y_train' + '.pt')
    Y_test = torch.load(name_download + 'Y_test' + '.pt')
    Y_val = torch.load(name_download + 'Y_val' + '.pt')

    max_length = X_train.shape[1]
    weight_matrix = np.load(name_download + 'weight.npy')

In [22]:
data_download = 1
download = True
first_folder = 'data/'
possible_path = ['summary', 'keep_relevant', 'remove_relevant']

#Test
for element in possible_path:
    if not(os.path.exists(first_folder + element)):
        raise NameError(element)

print('You have choose the data: ', possible_path[data_download])

name_download = first_folder + possible_path[data_download] + '/'

You have choose the data:  keep_relevant


In [26]:
max_length

22188

In [40]:
batch_size = 16
epochs = 15
lr = 1e-4
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
kernel_sizes = [3, 4, 5]
hidden_size = 64
out_channel = 64

for numero in range(2):
    clean_cuda()
    name = 'bestModel/CnnLstm/' + str(numero) + '_2'
    c = CnnPart(torch.from_numpy(weight_matrix).float(),
                max_length,
                kernel_sizes,
                out_channel_len=out_channel)

    lstm = BiLstmAttention(hidden_size=hidden_size,
                           embedding_dim=len(kernel_sizes))

    attention = BahdanauAttention(2 * hidden_size,
                                  hidden_size)

    cnn = CnnLstm(c,
                  lstm,
                  attention,
                 hidden_size)
    cnn = cnn.to(device)
    # Create trainer
    trainer = list(DataLoader(DataSet(X_train, Y_train),
                              batch_size=batch_size,
                              shuffle=True))
    valider = list(DataLoader(DataSet(X_val, Y_val),
                              batch_size=1,
                              shuffle=False))
    # train our model

    train_callback(cnn,
                   trainer,
                   valider,
                   name_save=name,
                   w=None,
                   weight_decay=1e-4,
                   epochs=epochs,
                   lr=lr)

torch.Size([16, 64, 1])
tensor([[[0.0156, 0.0156, 0.0156,  ..., 0.0156, 0.0156, 0.0156]],

        [[0.0156, 0.0156, 0.0156,  ..., 0.0156, 0.0156, 0.0156]],

        [[0.0156, 0.0156, 0.0156,  ..., 0.0156, 0.0156, 0.0156]],

        ...,

        [[0.0156, 0.0156, 0.0156,  ..., 0.0156, 0.0156, 0.0156]],

        [[0.0156, 0.0156, 0.0156,  ..., 0.0156, 0.0156, 0.0156]],

        [[0.0156, 0.0156, 0.0156,  ..., 0.0156, 0.0156, 0.0156]]],
       device='cuda:0', grad_fn=<PermuteBackward>)
torch.Size([16, 64, 1])
tensor([[[0.0156, 0.0156, 0.0156,  ..., 0.0156, 0.0156, 0.0156]],

        [[0.0156, 0.0156, 0.0156,  ..., 0.0156, 0.0156, 0.0156]],

        [[0.0156, 0.0156, 0.0156,  ..., 0.0156, 0.0156, 0.0156]],

        ...,

        [[0.0156, 0.0156, 0.0156,  ..., 0.0156, 0.0156, 0.0156]],

        [[0.0156, 0.0156, 0.0156,  ..., 0.0156, 0.0156, 0.0156]],

        [[0.0156, 0.0156, 0.0156,  ..., 0.0156, 0.0156, 0.0156]]],
       device='cuda:0', grad_fn=<PermuteBackward>)
torch.Size([16, 64

KeyboardInterrupt: 

In [76]:
a = torch.normal(0.1, 1)
a

TypeError: normal() received an invalid combination of arguments - got (float, int), but expected one of:
 * (Tensor mean, Tensor std, torch.Generator generator, Tensor out)
 * (Tensor mean, float std, torch.Generator generator, Tensor out)
 * (float mean, Tensor std, torch.Generator generator, Tensor out)
 * (float mean, float std, tuple of ints size, torch.Generator generator, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)


In [27]:
@torch.no_grad()
def get_all_preds(model, loader):
    model.eval()
    all_preds = torch.tensor([])
    for batch in loader:
        images, labels = batch
        images, labels = images.cuda(), labels.cuda()
        preds = model(images)
        all_preds = torch.cat(
            (all_preds, preds.cpu()), dim=0)
    return all_preds

In [29]:
batch_size = 16
epochs = 15
lr = 1e-4
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
kernel_sizes = [3, 4, 5]
hidden_size = 64
out_channel = 64
numero = 2
name = 'bestModel/CnnLstm/' + str(numero) + '_2' + '.pth'

c = CnnPart(torch.from_numpy(weight_matrix).float(),
                max_length,
                kernel_sizes,
                out_channel_len=out_channel)



lstm = BiLstmAttention(hidden_size=hidden_size,embedding_dim=len(kernel_sizes))

attention = BahdanauAttention(2 * hidden_size,hidden_size)

cnn = CnnLstm(c,lstm,attention,hidden_size)

cnn.load_state_dict(torch.load(name))
cnn.to(device)
cnn.eval()

CnnLstm(
  (cnn): CnnPart(
    (embbeding_1): Embedding(16599, 200)
    (embbeding_2): Embedding(16599, 200)
    (dropout): Dropout(p=0.2, inplace=False)
    (block): ModuleList(
      (0): Sequential(
        (0): Conv2d(2, 64, kernel_size=(3, 200), stride=(1, 1))
        (1): ReLU()
        (2): Flatten()
        (3): MaxPool1d(kernel_size=22188, stride=22188, padding=0, dilation=1, ceil_mode=True)
      )
      (1): Sequential(
        (0): Conv2d(2, 64, kernel_size=(4, 200), stride=(1, 1))
        (1): ReLU()
        (2): Flatten()
        (3): MaxPool1d(kernel_size=22188, stride=22188, padding=0, dilation=1, ceil_mode=True)
      )
      (2): Sequential(
        (0): Conv2d(2, 64, kernel_size=(5, 200), stride=(1, 1))
        (1): ReLU()
        (2): Flatten()
        (3): MaxPool1d(kernel_size=22188, stride=22188, padding=0, dilation=1, ceil_mode=True)
      )
    )
  )
  (LSTM): BiLstmAttention(
    (lstm): LSTM(3, 64, batch_first=True, bidirectional=True)
  )
  (attention): Bahd

In [37]:
with torch.no_grad():
    prediction_loader = iter(DataLoader(DataSet(X_test, Y_test),
                                        batch_size=1,
                                        shuffle=False))
    y_pred = get_all_preds(cnn, prediction_loader)
proba, predicted = torch.max(y_pred, 1)
new_score(Y_test.numpy(), predicted.numpy())

Accuracy: 0.8549032688458973 
 F1: 0.8243123436700753 
 precision : 0.8727861626616813 
 recall : 0.8033418819655522 


In [None]:
for numero in range(3):
    batch_size = 16
    epochs = 10
    lr = 0.0001
    kernel_sizes = [3, 4, 5]
    name = 'bestModel/kim/' + str(numero) + '_2'
    cnn = CnnKim(matrix_weight=torch.from_numpy(weight_matrix).float(),
                 len_sentence=max_length,
                 kernel_sizes=kernel_sizes)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    cnn.to(device)
    # Create trainer
    trainer = list(DataLoader(DataSet(X_train, Y_train), batch_size=batch_size, shuffle=True))
    valider = list(DataLoader(DataSet(X_val, Y_val), batch_size=1, shuffle=False))
    # train our model
    train_callback(cnn,
                   trainer,
                   valider,
                   name_save=name,
                   w=None,
                   weight_decay=1e-4,
                   epochs=epochs,
                   lr=lr)

[1,    50] loss: 0.715
[1,   100] loss: 0.641
[1,   150] loss: 0.574
[1,   200] loss: 0.513
[1,   250] loss: 0.494
[1,   300] loss: 0.440
[1,   350] loss: 0.433
[1,   400] loss: 0.403
---- Mean loss = 0.527 -------- Loss Val 545.273----
[tensor(545.2727, device='cuda:0')]
=== Current lr: 0.000===
[2,    50] loss: 0.456
[2,   100] loss: 0.410
[2,   150] loss: 0.409
[2,   200] loss: 0.380
[2,   250] loss: 0.397
[2,   300] loss: 0.376
[2,   350] loss: 0.361
[2,   400] loss: 0.358
---- Mean loss = 0.393 -------- Loss Val 501.778----
[tensor(545.2727, device='cuda:0'), tensor(501.7780, device='cuda:0')]
=== Current lr: 0.000===
[3,    50] loss: 0.408
[3,   100] loss: 0.373
[3,   150] loss: 0.386
[3,   200] loss: 0.352
[3,   250] loss: 0.373
[3,   300] loss: 0.345
[3,   350] loss: 0.328
[3,   400] loss: 0.319
---- Mean loss = 0.360 -------- Loss Val 488.487----
[tensor(545.2727, device='cuda:0'), tensor(501.7780, device='cuda:0'), tensor(488.4875, device='cuda:0')]
=== Current lr: 0.000===
[

[6,   200] loss: 0.323
[6,   250] loss: 0.272
[6,   300] loss: 0.271
[6,   350] loss: 0.263
[6,   400] loss: 0.297
---- Mean loss = 0.290 -------- Loss Val 450.832----
[tensor(522.1523, device='cuda:0'), tensor(489.4928, device='cuda:0'), tensor(477.6366, device='cuda:0'), tensor(466.1240, device='cuda:0'), tensor(455.2747, device='cuda:0'), tensor(450.8319, device='cuda:0')]
=== Current lr: 0.000===
[7,    50] loss: 0.322
[7,   100] loss: 0.257
[7,   150] loss: 0.261
[7,   200] loss: 0.300
[7,   250] loss: 0.264
[7,   300] loss: 0.255
[7,   350] loss: 0.267
[7,   400] loss: 0.275
---- Mean loss = 0.275 -------- Loss Val 449.009----
[tensor(522.1523, device='cuda:0'), tensor(489.4928, device='cuda:0'), tensor(477.6366, device='cuda:0'), tensor(466.1240, device='cuda:0'), tensor(455.2747, device='cuda:0'), tensor(450.8319, device='cuda:0'), tensor(449.0093, device='cuda:0')]
=== Current lr: 0.000===
[8,    50] loss: 0.322
[8,   100] loss: 0.253
[8,   150] loss: 0.260
[8,   200] loss: 0.

In [42]:
numero = 2
kernel_sizes = [3, 4, 5]
name = 'bestModel/kim/' + str(numero) + '_2.pth'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
cnn = CnnKim(matrix_weight=torch.from_numpy(weight_matrix).float(),
             len_sentence=max_length, kernel_sizes=kernel_sizes)
cnn.load_state_dict(torch.load(name))
cnn.to(device)
cnn.eval()

CnnKim(
  (embbeding_1): Embedding(16599, 200)
  (embbeding_2): Embedding(16599, 200)
  (block): ModuleList(
    (0): Sequential(
      (0): Conv2d(2, 100, kernel_size=(3, 200), stride=(1, 1))
      (1): ReLU()
      (2): Flatten()
      (3): MaxPool1d(kernel_size=22188, stride=22188, padding=0, dilation=1, ceil_mode=True)
    )
    (1): Sequential(
      (0): Conv2d(2, 100, kernel_size=(4, 200), stride=(1, 1))
      (1): ReLU()
      (2): Flatten()
      (3): MaxPool1d(kernel_size=22188, stride=22188, padding=0, dilation=1, ceil_mode=True)
    )
    (2): Sequential(
      (0): Conv2d(2, 100, kernel_size=(5, 200), stride=(1, 1))
      (1): ReLU()
      (2): Flatten()
      (3): MaxPool1d(kernel_size=22188, stride=22188, padding=0, dilation=1, ceil_mode=True)
    )
  )
  (linear): Linear(in_features=300, out_features=2, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [43]:
with torch.no_grad():
    prediction_loader = iter(DataLoader(DataSet(X_test, Y_test),
                                        batch_size=1,
                                        shuffle=False))
    y_pred = get_all_preds(cnn, prediction_loader)
proba, predicted = torch.max(y_pred, 1)
new_score(Y_test.numpy(), predicted.numpy())

Accuracy: 0.8468979319546365 
 F1: 0.81877545602057 
 precision : 0.8498897667515934 
 recall : 0.8026675888107903 


In [27]:
from torch import nn
import torch

In [15]:
a = torch.tensor([[[1, 2, 3, 4],
                  [1, 3, 4, 10]]], dtype=torch.float32)
layer = nn.AdaptiveMaxPool1d(1)

In [2]:
import torch
from torch import nn

In [3]:
X = torch.rand((2,3,100))

In [5]:
X = nn.utils.rnn.pad_packed_sequence(X, lengths=[3, 3], batch_first=False, enforce_sorted=False)

TypeError: pad_packed_sequence() got multiple values for argument 'batch_first'