In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from multiprocessing import Pool
from collections import namedtuple
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.base import BaseEstimator, TransformerMixin
import networkx as nx
from torch.nn.utils.rnn import pack_padded_sequence, pad_sequence
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS as stop_words
from sklearn.preprocessing import LabelEncoder
from nltk.corpus import stopwords as stopwords_by_lang

import copy

from tqdm.notebook import tqdm

import re
from collections import Counter
import scipy.sparse as sp
import numpy as np

In [2]:
from TGA.utils import Dataset

In [3]:
dataset = Dataset('/home/Documents/datasets/20ng/')
g = dataset.get_fold_instances(10, with_val=True)
fold = next(g)
fold._fields, len(fold.X_train)

(('X_train', 'y_train', 'X_test', 'y_test', 'X_val', 'y_val'), 15062)

In [4]:
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

replace_patterns = [
    ('<[^>]*>', ''),                                    # remove HTML tags
    ('(\D)\d\d:\d\d:\d\d(\D)', '\\1 ParsedTime \\2'),
    ('(\D)\d\d:\d\d(\D)', '\\1 ParsedTime \\2'),
    ('(\D)\d:\d\d:\d\d(\D)', '\\1 ParsedTime \\2'),
    ('(\D)\d:\d\d(\D)', '\\1 ParsedTime \\2'),
    ('(\D)\d\d\d\-\d\d\d\d(\D)', '\\1 ParsedPhoneNum \\2'),
    ('(\D)\d\d\d\D\d\d\d\-\d\d\d\d(\D)', '\\1 ParsedPhoneNum \\2'),
    ('(\D\D)\d\d\d\D\D\d\d\d\-\d\d\d\d(\D)', '\\1 ParsedPhoneNum \\2'),
    ('(\D)\d\d\d\d\d\-\d\d\d\d(\D)', '\\1 ParsedZipcodePlusFour \\2'),
    ('(\D)\d(\D)', '\\1ParsedOneDigit\\2'),
    ('(\D)\d\d(\D)', '\\1ParsedTwoDigits\\2'),
    ('(\D)\d\d\d(\D)', '\\1ParsedThreeDigits\\2'),
    ('(\D)\d\d\d\d(\D)', '\\1ParsedFourDigits\\2'),
    ('(\D)\d\d\d\d\d(\D)', '\\1ParsedFiveDigits\\2'),
    ('(\D)\d\d\d\d\d\d(\D)', '\\1ParsedSixDigits\\2'),
    ('\d+', 'ParsedDigits')
]

compiled_replace_patterns = [(re.compile(p[0]), p[1]) for p in replace_patterns]

def generate_preprocessor(replace_patterns):
    compiled_replace_patterns = [(re.compile(p[0]), p[1]) for p in replace_patterns]
    def preprocessor(text):
        for pattern, replace in compiled_replace_patterns:
            text = re.sub(pattern, replace, text)
        text = text.lower()
        return text
    return preprocessor

generated_patters=generate_preprocessor(replace_patterns)

def preprocessor(text):
    # For each pattern, replace it with the appropriate string
    for pattern, replace in compiled_replace_patterns:
        text = re.sub(pattern, replace, text)
    text = text.lower()
    return text

class Tokenizer(BaseEstimator, TransformerMixin):
    def __init__(self, mindf=2, lan='english', stopwords='nltk', model='topk', k=500, verbose=False):
        super(Tokenizer, self).__init__()
        self.mindf = mindf
        self.le = LabelEncoder()
        self.verbose = verbose
        self.lan = lan
        if stopwords == 'nltk':
            self.stopwordsSet = stopwords_by_lang.words(lan)
        elif stopwords == 'scikit':
            self.stopwordsSet = stop_words
        else:
            self.stopwordsSet = []
        self.model =  model
        self.k     = k
        self.analyzer = TfidfVectorizer(preprocessor=preprocessor, min_df=mindf)#.build_analyzer()
        self.local_analyzer = self.analyzer.build_analyzer()
        self.analyzer.set_params( analyzer=self.local_analyzer )
        self.node_mapper      = {}
        
    def analyzer_doc(self, doc):
        return self.local_analyzer(doc)
    def fit(self, X, y):
        self.N = len(X)
        y = self.le.fit_transform( y )
        self.n_class = len(self.le.classes_)
        docs_in_terms = []
        
        with Pool(processes=18) as p:
            #docs = map(self.local_analyzer, X)
            for doc_in_terms in tqdm(p.imap(self.analyzer_doc, X), total=self.N, disable=not self.verbose):
                doc_in_terms = list(set(map( self._filter_fit_, list(doc_in_terms) ))) 
                docs_in_terms.extend(doc_in_terms)
        
        self.term_freqs       = Counter(docs_in_terms)
        self.term_freqs       = { term:v for (term,v) in self.term_freqs.items() if v >= self.mindf }
        self.node_mapper      = { term: self.node_mapper.setdefault(term, len(self.node_mapper)+1)
                                 for term in self.term_freqs.keys() }
        self.node_mapper['<BLANK>'] = 0
        self.term_freqs['<BLANK>']  = self.N
        
        self.node_mapper['<UNK>']   = len(self.node_mapper)
        self.term_freqs['<UNK>']  = self.N
        self.vocab_size = len(self.node_mapper)
        
        self.term_array = [ term for (term,term_id) in sorted(self.node_mapper.items(), key=lambda x: x[1]) ]
        
        self.fi_ = np.array([ np.log2( (self.N+1)/(self.term_freqs[term]+1) ) for term in self.term_array ])
            
        return self
    def _filter_transform_(self, term):
        if term in self.stopwordsSet:
            return '<STPW>'
        if term not in self.node_mapper:
            return '<UNK>'
        return term
    def _filter_fit_(self, term):
        if term in self.stopwordsSet:
            return '<STPW>'
        return term
    def _model_(self, doc):
        doc_counter = Counter(doc)
        doc = np.array(list(doc_counter.keys()))
        if len(doc) > self.k:
            weigths = np.array([ self.fi_[t] for t in doc ])
            weigths = softmax(weigths)
            if self.model == 'topk':
                doc = doc[(-weigths).argsort()[:self.k]]
            elif self.model == 'sample':
                doc = np.random.choice(doc, size=self.k, replace=False, p=weigths)
        TFs = np.array([ doc_counter[tid] for tid in doc ])
        DFs = np.array([ self.term_freqs[self.term_array[tid]] for tid in doc ])
        return doc, TFs, DFs
    def transform(self, X, verbose=None):
        verbose = verbose if verbose is not None else self.verbose
        n = len(X)
        terms_ = []
        for i,doc_in_terms in tqdm(enumerate(map(self.analyzer_doc, X)), total=n, disable=not verbose):
            doc_in_terms = map( self._filter_transform_, doc_in_terms )
            #doc_in_terms = filter( lambda x: x != '<STPW>', doc_in_terms )
            doc_tids = [ self.node_mapper[tid] for tid in doc_in_terms ]
            doc_tids, TFs, DFs = self._model_(doc_tids)
            terms_.append( (doc_tids, TFs, DFs) )
        doc_tids, TFs, DFs = list(zip(*terms_))
        return list(doc_tids), list(TFs), list(DFs)

In [5]:
tokenizer = Tokenizer(mindf=1, verbose=True, stopwords=None)
tokenizer.fit(fold.X_train, fold.y_train)
tokenizer.vocab_size, tokenizer.N

  0%|          | 0/15062 [00:00<?, ?it/s]

(99011, 15062)

In [6]:
doc_tids, TFs, DFs =  tokenizer.transform( fold.X_val )

  0%|          | 0/1892 [00:00<?, ?it/s]

In [7]:
pad_sequence(list(map(torch.LongTensor, TFs)), batch_first=True, padding_value=0)

tensor([[ 1,  1,  1,  ...,  0,  0,  0],
        [ 7,  2,  2,  ...,  0,  0,  0],
        [ 1,  1,  1,  ...,  0,  0,  0],
        ...,
        [ 2,  1, 15,  ...,  0,  0,  0],
        [ 4,  1,  2,  ...,  0,  0,  0],
        [ 2,  2,  1,  ...,  1,  1,  2]])

In [8]:
tokenizer.term_array[15749]

'beef'

In [9]:
y_train = tokenizer.le.transform( fold.y_train )
y_val   = tokenizer.le.transform( fold.y_val )
y_test  = tokenizer.le.transform( fold.y_test )

In [10]:
class AttentionTFIDF_V1(nn.Module):
    def __init__(self, vocab_size, hiddens, nclass, maxF=20, drop=.5, initrange=.5, negative_slope=99.):
        super(AttentionTFIDF_V1, self).__init__()
        self.hiddens        = hiddens
        self.maxF           = maxF
        self.value_emb      = nn.Embedding(vocab_size, hiddens, scale_grad_by_freq=True, padding_idx=0)
        self.query_emb      = nn.Embedding(vocab_size, hiddens, scale_grad_by_freq=True, padding_idx=0)
        self.key_emb        = nn.Embedding(vocab_size, hiddens, scale_grad_by_freq=True, padding_idx=0)
        self.TF_emb         = nn.Embedding(maxF, hiddens, scale_grad_by_freq=True, padding_idx=0)
        self.DF_emb         = nn.Embedding(maxF, hiddens, scale_grad_by_freq=True, padding_idx=0)
        self.fc             = nn.Linear(hiddens, nclass)
        self.initrange      = initrange 
        self.negative_slope = negative_slope
        self.drop_          = drop
        self.init_weights()
    def forward(self, doc_tids, TFs, DFs):
        batch_size = doc_tids.size(0)
        bx_packed  = doc_tids == 0
        pad_mask   = bx_packed.logical_not()
        doc_sizes  = pad_mask.sum(dim=1).view(batch_size, 1)
        pad_mask   = pad_mask.view(*bx_packed.shape, 1)
        pad_mask   = pad_mask.logical_and(pad_mask.transpose(1, 2))
        
        TFs     = torch.clamp( TFs, max=self.maxF-1 )
        h_TFs   = self.TF_emb( TFs )
        h_TFs   = F.dropout( h_TFs, p=self.drop_, training=self.training )
        
        DFs     = torch.clamp( DFs, max=self.maxF-1 )
        h_DFs   = self.DF_emb( DFs )
        h_DFs   = F.dropout( h_DFs, p=self.drop_, training=self.training )
        
        h_query = self.query_emb( doc_tids )
        h_query = h_query + h_TFs + h_DFs
        h_query = torch.tanh( h_query )
        h_query = F.dropout( h_query, p=self.drop_, training=self.training )
        
        h_key = self.key_emb( doc_tids )
        h_key = h_key + h_TFs + h_DFs
        h_key = torch.tanh( h_key )
        h_key = F.dropout( h_key, p=self.drop_, training=self.training )
        
        co_weights = torch.bmm( h_key, h_query.transpose( 1, 2 ) )
        co_weights = F.leaky_relu( co_weights, negative_slope=self.negative_slope)
        
        #co_weights[pad_mask.logical_not()] = 0 # Set the 3D-pad mask values to 
        co_weights[pad_mask.logical_not()] = float('-inf') # Set the 3D-pad mask values to -inf (=0 in sigmoid)
        co_weights = torch.sigmoid(co_weights)
        
        weights = co_weights.sum(axis=2) / doc_sizes
        weights[bx_packed] = float('-inf') # Set the 2D-pad mask values to -inf  (=0 in softmax)
        
        weights = torch.softmax(weights, dim=1)
        weights = torch.where(torch.isnan(weights), torch.zeros_like(weights), weights)
        weights = weights.view( *weights.shape, 1 )
        
        h_value = self.value_emb( doc_tids )
        h_value = h_value + h_TFs + h_DFs
        h_value = F.dropout( h_value, p=self.drop_, training=self.training )
        
        docs_h = h_value * weights
        docs_h = docs_h.sum(axis=1)
        docs_h = F.dropout( docs_h, p=self.drop_, training=self.training )
        docs_h = self.fc(docs_h)
        return docs_h, weights, co_weights
    
    def init_weights(self):
        self.TF_emb.weight.data.uniform_(-self.initrange, self.initrange)
        self.DF_emb.weight.data.uniform_(-self.initrange, self.initrange)
        self.query_emb.weight.data.uniform_(-self.initrange, self.initrange)
        self.key_emb.weight.data.uniform_(-self.initrange, self.initrange)
        self.value_emb.weight.data.uniform_(-self.initrange, self.initrange)
        self.fc.weight.data.uniform_(-self.initrange, self.initrange)
        

In [11]:
class AttentionTFIDF_V2(nn.Module):
    def __init__(self, vocab_size, hiddens, nclass,
                 maxF=20, drop=.5, initrange=.5, negative_slope=99.):
        super(AttentionTFIDF_V2, self).__init__()
        self.hiddens        = hiddens
        self.maxF           = maxF
        self.query_emb      = nn.Embedding(vocab_size, hiddens, scale_grad_by_freq=True, padding_idx=0)
        self.key_term_emb   = nn.Embedding(vocab_size, hiddens, scale_grad_by_freq=True, padding_idx=0)
        self.key_clss_emb   = nn.Embedding(hiddens*nclass, hiddens, scale_grad_by_freq=True, padding_idx=0)
        self.value_emb      = nn.Embedding(hiddens*nclass, hiddens, scale_grad_by_freq=True, padding_idx=0)
        self.TF_emb         = nn.Embedding(maxF, hiddens, scale_grad_by_freq=True, padding_idx=0)
        self.DF_emb         = nn.Embedding(maxF, hiddens, scale_grad_by_freq=True, padding_idx=0)
        self.fc             = nn.Linear(hiddens, nclass)
        self.initrange      = initrange 
        self.negative_slope = negative_slope
        self.drop_          = drop
        self.init_weights()
    def forward(self, doc_tids, TFs, DFs):
        batch_size = doc_tids.size(0)
        bx_packed  = doc_tids == 0
        pad_mask   = bx_packed.logical_not()
        doc_sizes  = pad_mask.sum(dim=1).view(batch_size, 1)
        pad_mask   = pad_mask.view(*bx_packed.shape, 1)
        pad_mask   = pad_mask.logical_and(pad_mask.transpose(1, 2))
        
        TFs     = torch.clamp( TFs, max=self.maxF-1 )
        h_TFs   = self.TF_emb( TFs )
        h_TFs   = F.dropout( h_TFs, p=self.drop_, training=self.training )
        
        DFs     = torch.clamp( DFs, max=self.maxF-1 )
        h_DFs   = self.DF_emb( DFs )
        h_DFs   = F.dropout( h_DFs, p=self.drop_, training=self.training )
        
        h_query = self.query_emb( doc_tids )
        h_query = h_query + h_TFs + h_DFs
        h_query = torch.tanh( h_query )
        h_query = F.dropout( h_query, p=self.drop_, training=self.training )
        
        h_key = self.key_term_emb( doc_tids )
        h_key = h_key + h_TFs + h_DFs
        h_key = torch.tanh( h_key )
        h_key = F.dropout( h_key, p=self.drop_, training=self.training )
        
        co_weights = torch.bmm( h_key, h_query.transpose( 1, 2 ) )
        co_weights = F.leaky_relu( co_weights, negative_slope=self.negative_slope)
        
        #co_weights[pad_mask.logical_not()] = 0 # Set the 3D-pad mask values to 
        co_weights[pad_mask.logical_not()] = float('-inf') # Set the 3D-pad mask values to -inf (=0 in sigmoid)
        co_weights = torch.sigmoid(co_weights)
        
        
        
        h_clss_key = self.key_clss_emb.weights
        h_clss_key = h_clss_key
        h_clss_key = torch.tanh( h_clss_key )
        h_clss_key = F.dropout( h_clss_key, p=self.drop_, training=self.training )
        
        co_weights = torch.bmm( h_key, h_query.transpose( 1, 2 ) )
        co_weights = F.leaky_relu( co_weights, negative_slope=self.negative_slope)
        
        weights = co_weights.sum(axis=2) / doc_sizes
        weights[bx_packed] = float('-inf') # Set the 2D-pad mask values to -inf  (=0 in softmax)
        
        weights = torch.softmax(weights, dim=1)
        weights = torch.where(torch.isnan(weights), torch.zeros_like(weights), weights)
        weights = weights.view( *weights.shape, 1 )
        
        h_value = self.value_emb( doc_tids )
        h_value = h_value + h_TFs + h_DFs
        h_value = F.dropout( h_value, p=self.drop_, training=self.training )
        
        docs_h = h_value * weights
        docs_h = docs_h.sum(axis=1)
        docs_h = F.dropout( docs_h, p=self.drop_, training=self.training )
        docs_h = self.fc(docs_h)
        return docs_h, weights, co_weights
    
    def init_weights(self):
        self.TF_emb.weight.data.uniform_(-self.initrange, self.initrange)
        self.DF_emb.weight.data.uniform_(-self.initrange, self.initrange)
        self.query_emb.weight.data.uniform_(-self.initrange, self.initrange)
        self.key_emb.weight.data.uniform_(-self.initrange, self.initrange)
        self.value_emb.weight.data.uniform_(-self.initrange, self.initrange)
        self.fc.weight.data.uniform_(-self.initrange, self.initrange)

In [12]:
nepochs = 1000
max_epochs = 30
drop=0.8
max_drop=.7 # default .8
device = torch.device('cuda:0')
batch_size = 16 # default 32
k = 512

In [13]:
def collate_train(param):
    X, y = zip(*param)
    doc_tids, TFs, DFs = tokenizer.transform(X, verbose=False)
    doc_tids = pad_sequence(list(map(torch.LongTensor, doc_tids)), batch_first=True, padding_value=0)
    
    TFs = pad_sequence(list(map(torch.tensor, TFs)), batch_first=True, padding_value=0)
    TFs = torch.LongTensor(torch.log2(TFs+1).round().long())
    
    DFs = pad_sequence(list(map(torch.tensor, DFs)), batch_first=True, padding_value=0)
    DFs = torch.LongTensor(torch.log2(DFs+1).round().long())
    
    return doc_tids, TFs, DFs, torch.LongTensor(y)

In [14]:
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)
torch.manual_seed(42)

<torch._C.Generator at 0x7f468814b070>

In [15]:
torch.device('cuda:0')

device(type='cuda', index=0)

In [16]:
#sc = SimpleClassifier(tokenizer.vocab_size, 300, tokenizer.n_class, dropout=drop).to( device )
ab = AttentionTFIDF_V1(tokenizer.vocab_size, 300, tokenizer.n_class, drop=drop).to( device )
#ab = AttentionBag(tokenizer.vocab_size, 300, tokenizer.n_class, drop=drop).to( device )
#ab = NotTooSimpleClassifier(tokenizer.vocab_size, 300, tokenizer.n_class, dropout1=drop, dropout2=drop).to( device )
tokenizer.k = k
optimizer = optim.AdamW( ab.parameters(), lr=5e-3, weight_decay=5e-3)
loss_func_cel = nn.CrossEntropyLoss().to( device )
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=.95,
                                                       patience=10, verbose=True)
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=.98, verbose=True)

In [17]:
num_workers=32

In [18]:
best = 99999.
counter = 1
loss_val = 1.
eps = .9
dl_val = DataLoader(list(zip(fold.X_val, y_val)), batch_size=batch_size,
                         shuffle=False, collate_fn=collate_train, num_workers=num_workers)
for e in tqdm(range(nepochs), total=nepochs):
    dl_train = DataLoader(list(zip(fold.X_train, y_train)), batch_size=batch_size,
                             shuffle=True, collate_fn=collate_train, num_workers=num_workers)
    loss_train  = 0.
    with tqdm(total=len(y_train)+len(y_val), smoothing=0., desc=f"Epoch {e+1}") as pbar:
        total = 0
        correct  = 0
        ab.train()
        tokenizer.model = 'sample'
        tokenizer.k = k
        for i, (doc_tids, TFs, DFs, y) in enumerate(dl_train):
            doc_tids = doc_tids.to( device )
            TFs      = TFs.to( device )
            DFs      = DFs.to( device )
            y        = y.to( device )
            
            pred_docs,_,_ = ab( doc_tids, TFs, DFs )
            pred_docs     = torch.softmax(pred_docs, dim=1)
            loss          = loss_func_cel(pred_docs, y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            loss_train += loss.item()
            total      += len(y)
            y_pred      = pred_docs.argmax(axis=1)
            correct    += (y_pred == y).sum().item()
            #ab.drop_ =  np.power((correct/total),loss_val)
            #ab.drop_ =  np.power((correct/total),4)
            ab.drop_ =  (correct/total)*max_drop
            
            toprint  = f"Train loss: {loss_train/(i+1):.5}/{loss.item():.5} "
            toprint += f'Drop: {ab.drop_:.5} '
            toprint += f'ACC: {correct/total:.5} '
            
            print(toprint, end=f"{' '*100}\r")
            
            pbar.update( len(y) )
            del doc_tids, TFs
            del DFs, y, pred_docs
            del loss, y_pred
        loss_train = loss_train/(i+1)
        print()
        #print(ab.drop_)
        total = 0
        correct  = 0
        ab.eval()
        tokenizer.model = 'topk'
        tokenizer.k = 512
        with torch.no_grad():
            loss_val = 0.
            for i, (doc_tids, TFs, DFs, y) in enumerate(dl_val):
                doc_tids = doc_tids.to( device )
                TFs      = TFs.to( device )
                DFs      = DFs.to( device )
                y        = y.to( device )

                pred_docs,_,_ = ab( doc_tids, TFs, DFs )
                pred_docs     = torch.softmax(pred_docs, dim=1)
                loss          = loss_func_cel(pred_docs, y)

                loss_val   += loss.item()
                total      += len(y)
                y_pred      = pred_docs.argmax(axis=1)
                correct    += (y_pred == y).sum().item()
                
                print(f'Val loss: {loss_val/(i+1):.5} ACC: {correct/total:.5}', end=f"{' '*100}\r")
                pbar.update( len(y) )
                
                del doc_tids, TFs, DFs, y
                del pred_docs, loss
            print()
            loss_val   = (loss_val/(i+1))
            scheduler.step(loss_val)

            if best-loss_val > 0.0001 :
                best = loss_val
                counter = 1
                print(f'New Best Val loss: {best:.5}', end=f"{' '*100}\n")
                best_model = copy.deepcopy(ab).to('cpu')
            elif counter > max_epochs:
                print(f'Best Val loss: {best:.5}', end=f"{' '*100}\n")
                break
            else:
                counter += 1

  0%|          | 0/1000 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.6758/2.735 Drop: 0.30785 ACC: 0.43978                                                                                                        
Val loss: 2.3923 ACC: 0.71459                                                                                                    
New Best Val loss: 2.3923                                                                                                    


Epoch 2:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.312/2.2466 Drop: 0.55802 ACC: 0.79717                                                                                                      
Val loss: 2.2595 ACC: 0.84408                                                                                                    
New Best Val loss: 2.2595                                                                                                    


Epoch 3:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.23/2.1862 Drop: 0.60487 ACC: 0.8641                                                                                                        
Val loss: 2.2277 ACC: 0.86258                                                                                                    
New Best Val loss: 2.2277                                                                                                    


Epoch 4:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.2016/2.0793 Drop: 0.62085 ACC: 0.88693                                                                                                     
Val loss: 2.2239 ACC: 0.86734                                                                                                    
New Best Val loss: 2.2239                                                                                                    


Epoch 5:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.184/2.0793 Drop: 0.63187 ACC: 0.90267                                                                                                      
Val loss: 2.2129 ACC: 0.87685                                                                                                    
New Best Val loss: 2.2129                                                                                                    


Epoch 6:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1679/2.0846 Drop: 0.64488 ACC: 0.92126                                                                                                     
Val loss: 2.2048 ACC: 0.88425                                                                                                    
New Best Val loss: 2.2048                                                                                                    


Epoch 7:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.155/2.3243 Drop: 0.65264 ACC: 0.93235                                                                                                      
Val loss: 2.1992 ACC: 0.88953                                                                                                    
New Best Val loss: 2.1992                                                                                                    


Epoch 8:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1446/2.079 Drop: 0.65896 ACC: 0.94138                                                                                                      
Val loss: 2.1967 ACC: 0.88584                                                                                                    
New Best Val loss: 2.1967                                                                                                    


Epoch 9:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1368/2.094 Drop: 0.66338 ACC: 0.94768                                                                                                      
Val loss: 2.1947 ACC: 0.88953                                                                                                    
New Best Val loss: 2.1947                                                                                                    


Epoch 10:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1336/2.0782 Drop: 0.66514 ACC: 0.95021                                                                                                     
Val loss: 2.1956 ACC: 0.89218                                                                                                    


Epoch 11:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1312/2.1328 Drop: 0.66677 ACC: 0.95253                                                                                                     
Val loss: 2.1938 ACC: 0.89112                                                                                                    
New Best Val loss: 2.1938                                                                                                    


Epoch 12:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1274/2.0884 Drop: 0.66928 ACC: 0.95611                                                                                                     
Val loss: 2.189 ACC: 0.89429                                                                                                     
New Best Val loss: 2.189                                                                                                    


Epoch 13:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1232/2.2436 Drop: 0.67193 ACC: 0.9599                                                                                                      
Val loss: 2.1893 ACC: 0.89429                                                                                                    


Epoch 14:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1207/2.1161 Drop: 0.67304 ACC: 0.96149                                                                                                     
Val loss: 2.1901 ACC: 0.89271                                                                                                    


Epoch 15:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1211/2.098 Drop: 0.67244 ACC: 0.96063                                                                                                      
Val loss: 2.1858 ACC: 0.89746                                                                                                    
New Best Val loss: 2.1858                                                                                                    


Epoch 16:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1183/2.0782 Drop: 0.67449 ACC: 0.96355                                                                                                     
Val loss: 2.1884 ACC: 0.89482                                                                                                    


Epoch 17:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1167/2.0782 Drop: 0.67532 ACC: 0.96475                                                                                                     
Val loss: 2.1875 ACC: 0.89799                                                                                                    


Epoch 18:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1182/2.0782 Drop: 0.6737 ACC: 0.96242                                                                                                      
Val loss: 2.1887 ACC: 0.89376                                                                                                    


Epoch 19:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1151/2.0782 Drop: 0.67653 ACC: 0.96647                                                                                                     
Val loss: 2.1916 ACC: 0.89006                                                                                                    


Epoch 20:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1147/2.2206 Drop: 0.67639 ACC: 0.96627                                                                                                     
Val loss: 2.1871 ACC: 0.89641                                                                                                    


Epoch 21:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1142/2.0782 Drop: 0.67658 ACC: 0.96654                                                                                                     
Val loss: 2.1855 ACC: 0.89746                                                                                                    
New Best Val loss: 2.1855                                                                                                    


Epoch 22:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1152/2.3091 Drop: 0.67634 ACC: 0.96621                                                                                                     
Val loss: 2.189 ACC: 0.89271                                                                                                     


Epoch 23:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1126/2.0869 Drop: 0.67806 ACC: 0.96866                                                                                                     
Val loss: 2.1864 ACC: 0.89905                                                                                                    


Epoch 24:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.11/2.0782 Drop: 0.67955 ACC: 0.97079                                                                                                       
Val loss: 2.1846 ACC: 0.89746                                                                                                    
New Best Val loss: 2.1846                                                                                                    


Epoch 25:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1105/2.0782 Drop: 0.67885 ACC: 0.96979                                                                                                     
Val loss: 2.1855 ACC: 0.89852                                                                                                    


Epoch 26:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1107/2.2584 Drop: 0.67913 ACC: 0.97019                                                                                                                                                                                                         
Val loss: 2.1917 ACC: 0.89112                                                                                                    


Epoch 27:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1094/2.079 Drop: 0.68025 ACC: 0.97178                                                                                                      
Val loss: 2.1863 ACC: 0.89693                                                                                                    


Epoch 28:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1079/2.2447 Drop: 0.68127 ACC: 0.97324                                                                                                     
Val loss: 2.1888 ACC: 0.89218                                                                                                    


Epoch 29:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1075/2.0782 Drop: 0.68095 ACC: 0.97278                                                                                                     
Val loss: 2.189 ACC: 0.89641                                                                                                     


Epoch 30:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1073/2.0782 Drop: 0.68132 ACC: 0.97331                                                                                                     
Val loss: 2.1886 ACC: 0.89376                                                                                                    


Epoch 31:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1069/2.2448 Drop: 0.6815 ACC: 0.97358                                                                                                      
Val loss: 2.1838 ACC: 0.89958                                                                                                    
New Best Val loss: 2.1838                                                                                                    


Epoch 32:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1095/2.0783 Drop: 0.67946 ACC: 0.97065                                                                                                     
Val loss: 2.1847 ACC: 0.89641                                                                                                    


Epoch 33:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1065/2.0851 Drop: 0.68169 ACC: 0.97384                                                                                                                                                                                                         
Val loss: 2.184 ACC: 0.89905                                                                                                     


Epoch 34:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1063/2.0784 Drop: 0.68201 ACC: 0.97431                                                                                                                                                                                                         
Val loss: 2.1845 ACC: 0.89588                                                                                                    


Epoch 35:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.106/2.079 Drop: 0.68266 ACC: 0.97524                                                                                                       
Val loss: 2.1847 ACC: 0.89535                                                                                                    


Epoch 36:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1057/2.0783 Drop: 0.68299 ACC: 0.9757                                                                                                      
Val loss: 2.1861 ACC: 0.89905                                                                                                    


Epoch 37:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1061/2.0782 Drop: 0.68174 ACC: 0.97391                                                                                                     
Val loss: 2.1812 ACC: 0.90063                                                                                                    
New Best Val loss: 2.1812                                                                                                    


Epoch 38:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1047/2.211 Drop: 0.68322 ACC: 0.97603                                                                                                      
Val loss: 2.1864 ACC: 0.89641                                                                                                    


Epoch 39:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1049/2.0795 Drop: 0.68285 ACC: 0.9755                                                                                                      
Val loss: 2.1855 ACC: 0.90063                                                                                                    


Epoch 40:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1047/2.099 Drop: 0.68341 ACC: 0.9763                                                                                                       
Val loss: 2.1878 ACC: 0.90011                                                                                                    


Epoch 41:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1059/2.0954 Drop: 0.68225 ACC: 0.97464                                                                                                     
Val loss: 2.1872 ACC: 0.89641                                                                                                    


Epoch 42:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1038/2.0787 Drop: 0.68378 ACC: 0.97683                                                                                                     
Val loss: 2.1872 ACC: 0.89535                                                                                                    


Epoch 43:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1026/2.0783 Drop: 0.68457 ACC: 0.97796                                                                                                     
Val loss: 2.1881 ACC: 0.89429                                                                                                    


Epoch 44:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1041/2.0782 Drop: 0.6835 ACC: 0.97643                                                                                                      
Val loss: 2.1904 ACC: 0.89482                                                                                                    


Epoch 45:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.103/2.0782 Drop: 0.68429 ACC: 0.97756                                                                                                      
Val loss: 2.192 ACC: 0.88953                                                                                                     


Epoch 46:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1034/2.0782 Drop: 0.68378 ACC: 0.97683                                                                                                     
Val loss: 2.1896 ACC: 0.89482                                                                                                    


Epoch 47:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1029/2.0782 Drop: 0.68452 ACC: 0.97789                                                                                                     
Val loss: 2.1846 ACC: 0.90063                                                                                                    


Epoch 48:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.103/2.0782 Drop: 0.68406 ACC: 0.97723                                                                                                      
Val loss: 2.1862 ACC: 0.89799                                                                                                    
Epoch    48: reducing learning rate of group 0 to 4.7500e-03.


Epoch 49:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1021/2.0782 Drop: 0.68485 ACC: 0.97836                                                                                                     
Val loss: 2.1824 ACC: 0.90275                                                                                                                                                                                                        


Epoch 50:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1033/2.2445 Drop: 0.68392 ACC: 0.97703                                                                                                     
Val loss: 2.1844 ACC: 0.89852                                                                                                    


Epoch 51:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.103/2.0782 Drop: 0.68383 ACC: 0.9769                                                                                                       
Val loss: 2.1838 ACC: 0.90169                                                                                                    


Epoch 52:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1006/2.2453 Drop: 0.68555 ACC: 0.97935                                                                                                     
Val loss: 2.1817 ACC: 0.90539                                                                                                    


Epoch 53:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.101/2.1659 Drop: 0.68564 ACC: 0.97948                                                                                                      
Val loss: 2.1849 ACC: 0.89746                                                                                                    


Epoch 54:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1012/2.2416 Drop: 0.68545 ACC: 0.97922                                                                                                     
Val loss: 2.1841 ACC: 0.89958                                                                                                    


Epoch 55:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1015/2.0791 Drop: 0.68508 ACC: 0.97869                                                                                                                                                                                                         
Val loss: 2.1854 ACC: 0.89905                                                                                                    


Epoch 56:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1007/2.0782 Drop: 0.68587 ACC: 0.97982                                                                                                     
Val loss: 2.1835 ACC: 0.90169                                                                                                    


Epoch 57:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1013/2.4007 Drop: 0.68559 ACC: 0.97942                                                                                                     
Val loss: 2.1848 ACC: 0.90116                                                                                                    


Epoch 58:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1001/2.0783 Drop: 0.6861 ACC: 0.98015                                                                                                      
Val loss: 2.1845 ACC: 0.89958                                                                                                    


Epoch 59:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1003/2.0864 Drop: 0.68578 ACC: 0.97968                                                                                                     
Val loss: 2.189 ACC: 0.89482                                                                                                     
Epoch    59: reducing learning rate of group 0 to 4.5125e-03.


Epoch 60:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.0993/2.0786 Drop: 0.68703 ACC: 0.98148                                                                                                     
Val loss: 2.1846 ACC: 0.89799                                                                                                    


Epoch 61:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.0992/2.0782 Drop: 0.68648 ACC: 0.98068                                                                                                     
Val loss: 2.1873 ACC: 0.89799                                                                                                    


Epoch 62:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.0989/2.0786 Drop: 0.68708 ACC: 0.98154                                                                                                     
Val loss: 2.1856 ACC: 0.89905                                                                                                    


Epoch 63:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1001/2.0782 Drop: 0.68634 ACC: 0.98048                                                                                                     
Val loss: 2.1864 ACC: 0.89852                                                                                                    


Epoch 64:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.0995/2.2159 Drop: 0.68662 ACC: 0.98088                                                                                                                                                                                                                                                                                                             
Val loss: 2.1875 ACC: 0.89588                                                                                                    


Epoch 65:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.1001/2.0782 Drop: 0.68662 ACC: 0.98088                                                                                                                                                                                                         
Val loss: 2.1847 ACC: 0.90063                                                                                                    


Epoch 66:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.0981/2.1074 Drop: 0.68731 ACC: 0.98187                                                                                                     
Val loss: 2.1867 ACC: 0.89588                                                                                                    


Epoch 67:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.0993/2.1232 Drop: 0.68671 ACC: 0.98101                                                                                                     
Val loss: 2.1888 ACC: 0.89323                                                                                                    


Epoch 68:   0%|          | 0/16954 [00:00<?, ?it/s]

Train loss: 2.0999/2.1734 Drop: 0.68629 ACC: 0.98041                                                                                                     
Val loss: 2.1847 ACC: 0.89852                                                                                                    
Best Val loss: 2.1812                                                                                                    


In [19]:
device_test = 'cpu'
ab = copy.deepcopy(best_model).to(device_test)
ab.eval()
loss_total = 0
correct_t = 0
total_t = 0
dl_test = DataLoader(list(zip(fold.X_test, y_test)), batch_size=128,
                         shuffle=False, collate_fn=collate_train, num_workers=num_workers)
tokenizer.k = 256
for i, (docs_tids_t, TFs_t, DFs_t, y_t) in enumerate(dl_test):
    docs_tids_t = docs_tids_t.to( device_test )
    TFs_t       = TFs_t.to( device_test )
    DFs_t       = DFs_t.to( device_test )
    y_t         = y_t.to( device_test )

    pred_docs_t,weigths,coweights = ab( docs_tids_t, TFs_t, DFs_t )
    sofmax_docs_t = torch.softmax(pred_docs_t, dim=1)

    y_pred_t    = sofmax_docs_t.argmax(axis=1)
    correct_t  += (y_pred_t == y_t).sum().item()
    total_t    += len(y_t)
    loss_total += loss_func_cel(sofmax_docs_t, y_t)

    print(f'Test loss: {loss_total.item()/(i+1):.5} ACC: {correct_t/total_t:.5}', end=f"{' '*100}\r")

Test loss: 2.1734 ACC: 0.90539                                                                                                    

In [20]:
v1 = torch.FloatTensor([[1,0,0],[0,1,0],[0,0,1]])