In [1]:
import warnings
warnings.filterwarnings('ignore')

from utils import Dataset, GraphsizePretrained
from tqdm import tqdm
from tqdm.notebook import tqdm
from time import time
import numpy as np

Using backend: pytorch


In [2]:
dataset = Dataset('/home/mangaravite/Documentos/datasets/classification/datasets/acm/')
fold = next(dataset.get_fold_instances(10))
fold._fields

('X_train', 'y_train', 'X_test', 'y_test', 'X_val', 'y_val')

In [3]:
%%time
graph_builder = GraphsizePretrained(w=2, verbose=True,
                   pretrained_vec='/home/mangaravite/Documentos/pretrained_vectors/glove.6B.300d.txt')

400000it [00:23, 16679.57it/s]


CPU times: user 23.7 s, sys: 579 ms, total: 24.3 s
Wall time: 24.2 s


In [4]:
import torch
import dgl
import dgl.function as fn
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.checkpoint import checkpoint
import networkx as nx
from dgl.nn.pytorch.conv import GraphConv, GATConv
from dgl.nn.pytorch.glob import GlobalAttentionPooling

from sklearn.preprocessing import LabelEncoder

from itertools import repeat

import torch.optim as optim
from torch.utils.data import DataLoader

In [5]:
%%time
graph_builder.fit(fold.X_train, fold.y_train)

100%|██████████| 19907/19907 [00:05<00:00, 3693.91it/s]


CPU times: user 6.62 s, sys: 52.3 ms, total: 6.67 s
Wall time: 6.68 s


GraphsizePretrained(pretrained_vec='/home/mangaravite/Documentos/pretrained_vectors/glove.6B.300d.txt',
          stopwords='remove', verbose=None, w=2)

In [6]:
len(graph_builder.g.edges), len(graph_builder.g)

(126449, 34676)

In [7]:
list(map(lambda x: (x,graph_builder.g.degree()[x]), graph_builder.label_ids))

[(0, 2921),
 (1, 8317),
 (2, 10134),
 (3, 15852),
 (4, 1086),
 (5, 5801),
 (6, 5148),
 (7, 14561),
 (8, 13217),
 (9, 3465),
 (10, 11293)]

In [8]:
class GenericGAT(nn.Module):
    def __init__(self, in_dim, hidden_dim,
                 n_heads=8, n_convs=2, drop=.5, first_hidden='emb', attn_drop=.5,
                 encoders={'term','label'}, device='cuda:0'):
        super(GenericGAT, self).__init__()
        self.device = torch.device(device)
        self.first_hidden = first_hidden
        
        self.encoders = nn.ModuleDict({
            k: nn.Linear(in_dim, hidden_dim).to(self.device) for k in encoders
        })
        
        self.layers = nn.ModuleList([
            GATConv(hidden_dim, hidden_dim, residual=True, num_heads=n_heads, activation=F.leaky_relu,
                    feat_drop=drop, attn_drop=attn_drop).to(self.device) for _ in range(n_convs)
        ])
        self.down_proj = [
            nn.Linear(n_heads*hidden_dim, hidden_dim).to(self.device) for _ in range(n_convs)
        ]
        
    def forward(self, G, **kwargs):
        h = G.ndata[self.first_hidden].float()
        for (k, mask) in kwargs.items():
            if k in self.encoders:
                if mask is not None:
                    h[ mask ] = self.encoders[k]( h[ mask ] )
                else:
                    h = self.encoders[k]( h )
        
        for l, conv in enumerate(self.layers):
            h = conv(G, h)
            h = h.view(h.shape[0], -1)
            h = self.down_proj[l]( h )
        
        return h
        

In [9]:
class TGA(nn.Module):
    def __init__(self, in_dim, hidden_dim, n_class,
                  n_heads=8, drop=.5, attn_drop=.5,
                  device='cuda:0'):
        super(TGA, self).__init__()
        self.n_class = n_class
        self.device = torch.device(device)
        self.gat_global = GenericGAT(in_dim, hidden_dim, 
                                     encoders={'label'}, 
                                     n_heads=n_heads, drop=drop,
                                     attn_drop=attn_drop, device=self.device)
        
        self.gat_local  = GenericGAT(hidden_dim, hidden_dim, 
                                     encoders={'term'}, 
                                     n_heads=n_heads, drop=drop,
                                     first_hidden='emb',
                                     attn_drop=attn_drop, device=self.device)

        self.lin = nn.Linear( hidden_dim, 1).to(self.device)
        # Depois tentar alguma ativação (ReLU, por exemplo, pode "desativar" alguns termos no softmax)
        self.pooling = GlobalAttentionPooling( self.lin ).to(self.device)

        # Fully Connected
        self.fc1 = nn.Linear( hidden_dim, hidden_dim//2).to(self.device)
        self.fc2 = nn.Linear( hidden_dim//2, hidden_dim//4).to(self.device)
        self.fc3 = nn.Linear( hidden_dim//4, self.n_class).to(self.device)
    def forward(self, G, gs):
        #h_global           = self.gat_global( G, label=G.ndata['label'].nonzero().flatten() )
        #gs.ndata['weight'] = h_global[ gs.ndata['idx'] ] # Tentar concatenando
        h_local            = self.gat_local(gs, term=None)
        #h_local            = torch.cat((h_local, h_global[ gs.ndata['idx'] ]), 1)
        h_local            = self.pooling( gs, h_local )
        h_local            = self.fc1( h_local )
        h_local            = self.fc2( h_local )
        h_local            = self.fc3( h_local )
        return h_local
# torch.Size([3652, 300]) torch.Size([3652, 300]) torch.Size([128, 300])
        

In [10]:
in_dim=300
hidden_dim=300
n_heads=8
drop=0.3
attn_drop=0.5
batch_size=128
device='cuda:0'

In [11]:
model = TGA( in_dim, hidden_dim, graph_builder.n_class,
            n_heads=n_heads, drop=drop, attn_drop=attn_drop )
model

TGA(
  (gat_global): GenericGAT(
    (encoders): ModuleDict(
      (label): Linear(in_features=300, out_features=300, bias=True)
    )
    (layers): ModuleList(
      (0): GATConv(
        (fc): Linear(in_features=300, out_features=2400, bias=False)
        (feat_drop): Dropout(p=0.3)
        (attn_drop): Dropout(p=0.5)
        (leaky_relu): LeakyReLU(negative_slope=0.2)
        (res_fc): Identity()
      )
      (1): GATConv(
        (fc): Linear(in_features=300, out_features=2400, bias=False)
        (feat_drop): Dropout(p=0.3)
        (attn_drop): Dropout(p=0.5)
        (leaky_relu): LeakyReLU(negative_slope=0.2)
        (res_fc): Identity()
      )
    )
  )
  (gat_local): GenericGAT(
    (encoders): ModuleDict(
      (term): Linear(in_features=300, out_features=300, bias=True)
    )
    (layers): ModuleList(
      (0): GATConv(
        (fc): Linear(in_features=300, out_features=2400, bias=False)
        (feat_drop): Dropout(p=0.3)
        (attn_drop): Dropout(p=0.5)
        (leaky

In [12]:
def collate(param):
    X, y = zip(*param)
    Gs_nx = graph_builder.transform(X)
    
    Gs_dgl_list = []
    idx_terms = { l for l in graph_builder.label_ids }
    for g in Gs_nx:
        g_dgl = dgl.DGLGraph()
        g_dgl.from_networkx(g, node_attrs=['emb', 'idx'] )
        Gs_dgl_list.append( g_dgl )
        
        idx_terms = idx_terms.union( set(nx.get_node_attributes(g,'idx').values()) )
    
    Gs_dgl = dgl.batch(Gs_dgl_list)
    big_graph_dgl = dgl.DGLGraph()
    
    big_graph_dgl.from_networkx(graph_builder.g, node_attrs=['emb', 'label', 'idx'] )
    
    #subgraph = graph_builder.g.subgraph(idx_terms)
    #big_graph_dgl.from_networkx(subgraph, node_attrs=['emb', 'label', 'idx'] )
    
    return big_graph_dgl, Gs_dgl, torch.tensor(y)

In [13]:
loss_func = nn.CrossEntropyLoss()

optimizer = optim.Adam( model.parameters(), lr=1e-3, weight_decay=1e-3)
#optimizer = optim.AdamW( model.parameters(), lr=1e-2, weight_decay=1e-3)

#optimizer = optim.RMSprop( model.parameters(), lr=1e-2, weight_decay=1e-4)
#optimizer = optim.RMSprop( model.parameters(), lr=0.0001 )

model.train()
torch.cuda.synchronize()

In [14]:
n_epochs = 10

for epoch in range(n_epochs):
    epoch_loss = 0
    data_loader = DataLoader(list(zip(fold.X_train, fold.y_train)), batch_size=batch_size,
                             shuffle=True, collate_fn=collate, num_workers=2)
    with tqdm(total=len(fold.y_train)) as pbar:
        total = 1
        correct = 1
        model.train()
        for G, gs, y in data_loader:
            G = G.to( torch.device('cuda:0') )
            gs = gs.to( torch.device('cuda:0') )
            y = y.to( torch.device('cuda:0') )
            
            optimizer.zero_grad()
            
            outputs = model( G, gs )
            probs_Y = torch.softmax(outputs, 1)
            sampled_Y = torch.argmax(probs_Y, 1).reshape(-1)
            
            total += y.size(0)
            correct += (sampled_Y == y).sum().item()
            
            del probs_Y, sampled_Y, G, gs
            
            # NN backprop phase
            loss = loss_func(outputs, y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.detach().item()
            
            pbar.update( len(y) )
            pbar.set_description_str(f'iter {epoch} Acc train: {correct/total:.3}')

HBox(children=(FloatProgress(value=0.0, max=19907.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19907.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19907.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19907.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19907.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19907.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19907.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19907.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19907.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=19907.0), HTML(value='')))




In [15]:
g = nx.Graph()
g.add_nodes_from( [ (0, {'idx': 0}), (1, {'idx': 1}), (2, {'idx': 2}) ] )

In [16]:
list(nx.get_node_attributes(g,'idx').values())

[0, 1, 2]

In [17]:
outputs

tensor([[ 8.5621e-01, -2.7914e+00, -5.0857e-01,  2.0302e+00, -3.2203e+00,
         -1.3610e+00, -1.6609e+00,  1.9490e+00,  1.4226e+00, -5.8304e-01,
          1.3319e+00],
        [-1.8599e+00, -1.6571e-01,  1.3800e+00,  3.0833e+00, -9.9206e-01,
         -4.4450e-01,  7.0561e-01, -1.2386e+00, -5.9356e-01, -3.3527e+00,
         -5.3752e-01],
        [ 3.1851e+00, -1.8506e+00, -1.1523e+00,  8.4189e-01, -2.2230e+00,
         -1.1341e+00, -2.2522e+00,  1.3125e+00,  6.6847e-01,  1.0213e+00,
          8.4633e-01],
        [-6.1414e-01, -3.2895e+00, -5.8638e-01,  2.3248e+00, -4.0843e+00,
         -2.6888e+00, -1.5182e+00,  1.4223e+00,  2.3233e+00, -4.1613e-01,
          3.2309e+00],
        [-6.5610e-02, -1.3356e-01, -5.1689e-01,  1.2052e+00, -4.5027e+00,
         -2.0272e+00, -3.9816e+00,  2.7224e+00,  2.2447e+00,  1.6045e+00,
          8.0511e-01],
        [-1.0919e+00, -9.6540e-01,  2.7508e+00,  2.1081e+00, -2.0723e+00,
         -1.1534e+00, -1.2076e+00,  2.0568e+00, -1.3149e-01, -2.4500e+0

In [18]:
labels_hiddens = torch.eye( 11 )
labels_hiddens

tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

CosineEmbeddingLoss()

tensor(0.5356, grad_fn=<MeanBackward1>)

tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]])

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])