In [1]:
import warnings
warnings.filterwarnings('ignore')

from utils import Dataset, GraphsizePretrained
from tqdm import tqdm
from tqdm.notebook import tqdm
from time import time
import numpy as np

Using backend: pytorch


In [2]:
dataset = Dataset('/home/mangaravite/Documentos/datasets/classification/datasets/reut/')
fold = next(dataset.get_fold_instances(10))
fold._fields

('X_train', 'y_train', 'X_test', 'y_test', 'X_val', 'y_val')

In [3]:
%%time
graph_builder = GraphsizePretrained(w=2, verbose=True,
                   pretrained_vec='/home/mangaravite/Documentos/pretrained_vectors/glove.6B.300d.txt')

400000it [00:23, 17314.28it/s]


CPU times: user 22.6 s, sys: 761 ms, total: 23.4 s
Wall time: 23.3 s


In [4]:
import torch
import dgl
import dgl.function as fn
import torch.nn as nn
import torch.nn.functional as F
import networkx as nx
from dgl.nn.pytorch.conv import GraphConv, GATConv
from dgl.nn.pytorch.glob import GlobalAttentionPooling

from sklearn.preprocessing import LabelEncoder

from itertools import repeat

import torch.optim as optim
from torch.utils.data import DataLoader

In [5]:
%%time
graph_builder.fit(fold.X_train, fold.y_train)

100%|██████████| 10627/10627 [00:05<00:00, 1847.31it/s]


CPU times: user 6.95 s, sys: 47.8 ms, total: 6.99 s
Wall time: 6.98 s


GraphsizePretrained(pretrained_vec='/home/mangaravite/Documentos/pretrained_vectors/glove.6B.300d.txt',
          stopwords='remove', verbose=None, w=2)

In [6]:
len(graph_builder.g.edges), len(graph_builder.g)

(164318, 22962)

In [7]:
list(map(lambda x: (x,graph_builder.g.degree()[x]), graph_builder.label_ids))

[(0, 239),
 (1, 77),
 (2, 454),
 (3, 2040),
 (4, 1129),
 (5, 626),
 (6, 2396),
 (7, 1077),
 (8, 1374),
 (9, 58),
 (10, 490),
 (11, 527),
 (12, 1469),
 (13, 1061),
 (14, 212),
 (15, 2121),
 (16, 5903),
 (17, 5511),
 (18, 3000),
 (19, 651),
 (20, 272),
 (21, 339),
 (22, 801),
 (23, 490),
 (24, 4236),
 (25, 616),
 (26, 2894),
 (27, 1716),
 (28, 762),
 (29, 1622),
 (30, 1784),
 (31, 6477),
 (32, 678),
 (33, 2005),
 (34, 402),
 (35, 108),
 (36, 399),
 (37, 621),
 (38, 113),
 (39, 10572),
 (40, 4550),
 (41, 9420),
 (42, 292),
 (43, 3248),
 (44, 2754),
 (45, 42),
 (46, 302),
 (47, 907),
 (48, 1138),
 (49, 494),
 (50, 170),
 (51, 610),
 (52, 5991),
 (53, 140),
 (54, 772),
 (55, 1598),
 (56, 498),
 (57, 1034),
 (58, 640),
 (59, 1353),
 (60, 1672),
 (61, 355),
 (62, 901),
 (63, 3315),
 (64, 513),
 (65, 372),
 (66, 379),
 (67, 2356),
 (68, 869),
 (69, 1307),
 (70, 460),
 (71, 3545),
 (72, 1663),
 (73, 1378),
 (74, 66),
 (75, 956),
 (76, 3284),
 (77, 1867),
 (78, 479),
 (79, 768),
 (80, 239),
 (81

In [8]:
class GenericGAT(nn.Module):
    def __init__(self, in_dim, hidden_dim,
                 n_heads=8, n_convs=2, drop=.5, first_hidden='emb', attn_drop=.5,
                 encoders={'term','label'}, device='cuda:0'):
        super(GenericGAT, self).__init__()
        self.device = torch.device(device)
        self.first_hidden = first_hidden
        
        self.encoders = nn.ModuleDict({
            k: nn.Linear(in_dim, hidden_dim).to(self.device) for k in encoders
        })
        
        self.layers = nn.ModuleList([
            GATConv(hidden_dim, hidden_dim, residual=True, num_heads=n_heads, activation=F.leaky_relu,
                    feat_drop=drop, attn_drop=attn_drop).to(self.device) for _ in range(n_convs)
        ])
        self.down_proj = [
            nn.Linear(n_heads*hidden_dim, hidden_dim).to(self.device) for _ in range(n_convs)
        ]
        
    def forward(self, G, **kwargs):
        h = G.ndata[self.first_hidden].float()
        for (k, mask) in kwargs.items():
            if k in self.encoders:
                if mask is not None:
                    h[ mask ] = self.encoders[k]( h[ mask ] )
                else:
                    h = self.encoders[k]( h )
        
        for l, conv in enumerate(self.layers):
            h = conv(G, h)
            h = h.view(h.shape[0], -1)
            h = self.down_proj[l]( h )
        
        return h
        

In [9]:
class TGA(nn.Module):
    def __init__(self, in_dim, hidden_dim, n_class,
                  n_heads=8, drop=.5, attn_drop=.5,
                  device='cuda:0'):
        super(TGA, self).__init__()
        self.n_class = n_class
        self.device = torch.device(device)
        self.gat_global = GenericGAT(in_dim, hidden_dim, 
                                     encoders={'label'}, 
                                     n_heads=n_heads, drop=drop,
                                     attn_drop=attn_drop, device=self.device)
        self.gat_local  = GenericGAT(hidden_dim, hidden_dim, 
                                     encoders={'term'}, 
                                     n_heads=n_heads, drop=drop,
                                     first_hidden='emb',
                                     attn_drop=attn_drop, device=self.device)

        self.lin = nn.Linear( 2*hidden_dim, 1).to(self.device)
        # Depois tentar alguma ativação (ReLU, por exemplo, pode "desativar" alguns termos no softmax)
        self.pooling = GlobalAttentionPooling( self.lin ).to(self.device)

        # Fully Connected
        self.fc1 = nn.Linear( 2*hidden_dim, self.n_class).to(self.device)
        #self.fc2 = nn.Linear( hidden_dim, self.n_class).to(self.device)
        #self.fc3 = nn.Linear( hidden_dim, self.n_class).to(self.device)
    def forward(self, G, gs):
        h_global           = self.gat_global( G, label=G.ndata['label'].nonzero().flatten() )
        #gs.ndata['weight'] = h_global[ gs.ndata['idx'] ] # Tentar concatenando
        h_local            = self.gat_local(gs, term=None)
        h_local            = torch.cat((h_local, h_global[ gs.ndata['idx'] ]), 1)
        h_local            = self.pooling( gs, h_local )
        return self.fc1( h_local )
# torch.Size([3652, 300]) torch.Size([3652, 300]) torch.Size([128, 300])
        

In [10]:
in_dim=300
hidden_dim=300
n_heads=8
drop=0.3
attn_drop=0.5
batch_size=32
device='cuda:0'

In [11]:
model = TGA( in_dim, hidden_dim, graph_builder.n_class,
            n_heads=n_heads, drop=drop, attn_drop=attn_drop )
model

TGA(
  (gat_global): GenericGAT(
    (encoders): ModuleDict(
      (label): Linear(in_features=300, out_features=300, bias=True)
    )
    (layers): ModuleList(
      (0): GATConv(
        (fc): Linear(in_features=300, out_features=2400, bias=False)
        (feat_drop): Dropout(p=0.3)
        (attn_drop): Dropout(p=0.5)
        (leaky_relu): LeakyReLU(negative_slope=0.2)
        (res_fc): Identity()
      )
      (1): GATConv(
        (fc): Linear(in_features=300, out_features=2400, bias=False)
        (feat_drop): Dropout(p=0.3)
        (attn_drop): Dropout(p=0.5)
        (leaky_relu): LeakyReLU(negative_slope=0.2)
        (res_fc): Identity()
      )
    )
  )
  (gat_local): GenericGAT(
    (encoders): ModuleDict(
      (term): Linear(in_features=300, out_features=300, bias=True)
    )
    (layers): ModuleList(
      (0): GATConv(
        (fc): Linear(in_features=300, out_features=2400, bias=False)
        (feat_drop): Dropout(p=0.3)
        (attn_drop): Dropout(p=0.5)
        (leaky

In [12]:
def collate(param):
    X, y = zip(*param)
    Gs_nx = graph_builder.transform(X)
    
    Gs_dgl = []
    for g in Gs_nx:
        g_dgl = dgl.DGLGraph()
        g_dgl.from_networkx(g, node_attrs=['emb', 'idx'] )
        Gs_dgl.append( g_dgl )
        
    big_graph_dgl = dgl.DGLGraph()
    big_graph_dgl.from_networkx(graph_builder.g, node_attrs=['emb', 'label', 'idx'] )
    
    return big_graph_dgl, dgl.batch(Gs_dgl), torch.tensor(y)

In [13]:
loss_func = nn.CrossEntropyLoss()

optimizer = optim.Adam( model.parameters(), lr=1e-3, weight_decay=1e-3)
#optimizer = optim.AdamW( model.parameters(), lr=1e-2, weight_decay=1e-3)

#optimizer = optim.RMSprop( model.parameters(), lr=1e-2, weight_decay=1e-4)
#optimizer = optim.RMSprop( model.parameters(), lr=0.0001 )

model.train()
torch.cuda.synchronize()

In [None]:
n_epochs = 3

for epoch in range(n_epochs):
    epoch_loss = 0
    data_loader = DataLoader(list(zip(fold.X_train, fold.y_train)), batch_size=batch_size,
                             shuffle=True, collate_fn=collate, num_workers=2)
    with tqdm(total=len(fold.y_train)) as pbar:
        total = 1
        correct = 1
        model.train()
        for G, gs, y in data_loader:
            G = G.to( torch.device('cuda:0') )
            gs = gs.to( torch.device('cuda:0') )
            y = y.to( torch.device('cuda:0') )
            
            #outputs = model( G, gs )
            #probs_Y = torch.softmax(outputs, 1)
            #sampled_Y = torch.argmax(probs_Y, 1).reshape(-1)
            
            #total += y.size(0)
            #correct += (sampled_Y == y).sum().item()
            
            # NN backprop phase
            #loss = loss_func(outputs, y)
            #optimizer.zero_grad()
            #loss.backward()
            #optimizer.step()
            #epoch_loss += loss.detach().item()
            
            pbar.update( len(y) )
            pbar.set_description_str(f'iter {epoch} Acc train: {correct/total:.3}')

HBox(children=(FloatProgress(value=0.0, max=10627.0), HTML(value='')))