# MARA - IMDB_mlh dataset tests - by Bartosz Trojan
The implementation will be based on the official MARA paper
Right now I don't have much to show, but this notebook will be updated

## Imports and data preprocessing

In [1]:
# os.environ['TORCH'] = torch.__version__
# print(torch.__version__)

# !pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
# !pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
# !pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

In [2]:
import os
import torch
import torch.nn as nn
from torch_geometric.nn import GCNConv
from utils.read_data_new import IMDB_mlh
from config import config

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Working on device: ", device)

imdb = IMDB_mlh().to(device)
imdb.info()

  from .autonotebook import tqdm as notebook_tqdm


Working on device:  cuda
IMDB movie type dataset:
 Number of nodes: 5614
 Number of edges: 14715
 Number of edges: layer1: 5443, layer2: 3658, cross_layer: 5614
 Number of features: 1000
 Number of classes: 3
 Number of nodes per class: tensor([ 640, 2438, 2536], device='cuda:0')


## Model architecture

In [None]:
class DropEdge(nn.Module):
    def __init__(self, simplification_type="l-b-l", p=0.2):
        super().__init__()
        self.simplification_type = simplification_type
        self.p = p

    def forward(self, edges, layers_lengths):
        if(self.simplification_type == "l-b-l"):
            intra_layers_length = torch.sum(layers_lengths[:-1])
            intra_mask = torch.rand(intra_layers_length) > self.p

            intra_layers = edges[:,:intra_layers_length]
            edges = torch.cat([intra_layers[:,intra_mask], edges[:,intra_layers_length:]], dim=1)

            new_layers_lenghts = []
            temp = 0
            for i in range(len(layers_lengths)-1):
                new_layers_lenghts.append(torch.sum(intra_mask[temp:temp + layers_lengths[i]]))
                temp += layers_lengths[i]
            new_layers_lenghts.append(layers_lengths[-1])

            return edges, torch.tensor(new_layers_lenghts)
        
        if(self.simplification_type == "multilayer"):
            mask = torch.rand(edges.shape[1]) > self.p
            edges = edges[:, mask]

            new_layers_lenghts = []
            temp = 0
            for i in range(len(layers_lengths)):
                new_layers_lenghts.append(torch.sum(mask[temp:temp + layers_lengths[i]]))
                temp += layers_lengths[i]

            return edges, torch.tensor(new_layers_lenghts)

In [None]:
# Dlaczego oni nie wspominają o żadnych funkcjach aktywacji w MARZE???

class MARA(nn.Module):
    def __init__(self, simplificaton_type=config["simplification_type"], simplification_stages=config["simplification_stages"], simplification_strategy=config["simplification_strategy"], DE_p=config["DE_p"], NS_k=config["NS_k"]):
        super().__init__()
        torch.manual_seed(1234)
        
        self.simplification_type = simplificaton_type
        self.simplification_stages = simplification_stages
        self.simplification_strategy = simplification_strategy
        self.DE_p = DE_p
        self.NS_k = NS_k
        
        self.conv1 = GCNConv(imdb.get_number_of_features(), 512)
        self.conv2 = GCNConv(512, 256)
        self.conv3 = GCNConv(256, 52)
        self.classifier = nn.Linear(52, imdb.get_number_of_classes())
        self.ReLU = torch.nn.ReLU6()

        self.dropout = torch.nn.Dropout(0) # ręcznie sprawdziłem dropout 0.1, 0.2 i 0.3 i żaden nie zwiększa wyniku

        self.dropedge = DropEdge(self.simplification_type, self.DE_p)

    def forward(self, x, edges, layers_lengths):
        if self.simplification_stages == "once":
            edges, layers_lengths = self.dropedge(edges, layers_lengths)
            h = self.conv1(x, edges)
            h = self.dropout(h)
            h = self.ReLU(h)
            h = self.conv2(h, edges)
            h = self.dropout(h)
            h = self.ReLU(h)
            h = self.conv3(h, edges)
            h = self.dropout(h)
            h = self.ReLU(h)

        if self.simplification_stages == "each":
            edges, layers_lengths = self.dropedge(edges, layers_lengths)
            h = self.conv1(x, edges)
            h = self.dropout(h)
            h = self.ReLU(h)
            edges, layers_lengths = self.dropedge(edges, layers_lengths)
            h = self.conv2(h, edges)
            h = self.dropout(h)
            h = self.ReLU(h)
            edges, layers_lengths = self.dropedge(edges, layers_lengths)
            h = self.conv3(h, edges)
            h = self.dropout(h)
            h = self.ReLU(h)

        out = torch.sigmoid(self.classifier(h))

        return out, h

model = MARA()
print(model)

MARA(
  (conv1): GCNConv(1000, 512)
  (conv2): GCNConv(512, 256)
  (conv3): GCNConv(256, 52)
  (classifier): Linear(in_features=52, out_features=3, bias=True)
  (ReLU): ReLU6()
  (dropout): Dropout(p=0, inplace=False)
  (dropedge): DropEdge()
)


## Simple model training

In [76]:
model = MARA(simplification_stages="once", simplification_strategy="multilayer", DE_p=0.2).to(device)

out, h = model(imdb.node_features, torch.cat([imdb.layer_1, imdb.layer_2, imdb.cross_edges], dim=0).t(), torch.tensor([imdb.layer_1.shape[0], imdb.layer_2.shape[0], imdb.cross_edges.shape[0]]))

print(out.shape)
print(h.shape)

# replace mask with masks

# test masks for sure
tr, val, test = imdb.get_training_mask()

print(torch.sum(tr), torch.sum(val), torch.sum(test))
print(torch.sum(tr[:2807]), torch.sum(val[:2807]), torch.sum(test[:2807]))

torch.Size([5614, 3])
torch.Size([5614, 52])
tensor(1400, device='cuda:0') tensor(1328, device='cuda:0') tensor(2886, device='cuda:0')
torch.Size([5614]) torch.Size([5614]) torch.Size([5614])
tensor(1400, device='cuda:0') tensor(1328, device='cuda:0') tensor(2886, device='cuda:0')
tensor(700, device='cuda:0') tensor(664, device='cuda:0') tensor(1443, device='cuda:0')


In [6]:
# class DropEdge(torch.autograd.Function):
#     def __init__(self, simplification_type="l-b-l", p=0.2):
#         self.simplification_type = simplification_type
#         self.p = p
        
#     @staticmethod
#     def forward(ctx, intra_edges, cross_edges):
#         if(ctx.simplification_type == "l-b-l"):
#             mask = torch.rand(intra_edges.shape) > ctx.p
#             ctx.save_for_backward(mask)

#             return intra_edges[mask], cross_edges
        
#         if(ctx.simplification_type == "multilayer"):
#             intra_mask = torch.rand(intra_edges.shape) > ctx.p
#             cross_mask = torch.rand(cross_edges.shape) > ctx.p
#             ctx.save_for_backward(intra_mask, cross_mask)

#             return intra_edges[intra_mask], cross_edges[cross_mask]

#     @staticmethod
#     def backward(ctx, grad_output):
#         if(ctx.simplification_type == "l-b-l"):
#             mask = ctx.saved_tensors
            
#             return intra_edges[mask], cross_edges
        
#         if(ctx.simplification_type == "multilayer"):
#             intra_mask = torch.rand(intra_edges.shape) > ctx.p
#             cross_mask = torch.rand(cross_edges.shape) > ctx.p
#             ctx.save_for_backward(intra_mask, cross_mask)

#             return intra_edges[intra_mask], cross_edges[cross_mask]

#         A = grad_output * D
#         return A / (1-p)

In [77]:
import torch
from sklearn.metrics import roc_auc_score

model = MARA().to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.002, weight_decay=0.0005)

train_mask, val_mask, test_mask = imdb.get_training_mask(train_mask_size=0.25, val_mask_size=0.25)

def roc_auc(preds, labels):
    return roc_auc_score(labels.detach().cpu(), torch.softmax(preds.detach().cpu(), 1), multi_class='ovr')

def train(data):
    model.train()
    optimizer.zero_grad()
    edges = torch.cat([data.layer_1, data.layer_2, data.cross_edges], dim=0).t()
    layers_lengths = torch.tensor([data.layer_1.shape[0], data.layer_2.shape[0], data.cross_edges.shape[0]], dtype=torch.int64)
    out, h = model(data.node_features, edges, layers_lengths)

    train_loss = criterion(out[train_mask], data.classes[train_mask])
    train_loss.backward()
    optimizer.step()

    train_score = roc_auc(out[train_mask], data.classes[train_mask])
    val_score = roc_auc(out[val_mask], data.classes[val_mask])

    return train_loss.item(), train_score, val_score

def evaluate(data, mask):
    model.eval()
    with torch.no_grad():
        edges = torch.cat([data.layer_1, data.layer_2, data.cross_edges], dim=0).t()
        layers_lengths = torch.tensor([data.layer_1.shape[0], data.layer_2.shape[0], data.cross_edges.shape[0]], dtype=torch.int64)
        out, h = model(data.node_features, edges, layers_lengths)
        score = roc_auc(out[mask], data.classes[mask])
    return score

# If you want to run this without early stopping, set patience to 
# bigger number than epoch number
best_val_score = 0
patience = 50
patience_counter = 0
best_weights = None

for epoch in range(201):
    train_loss, train_score, val_score = train(imdb)

    if val_score > best_val_score:
        best_val_score = val_score
        patience_counter = 0
        best_weights = model.state_dict()
    else:
        patience_counter += 1

    if patience_counter >= patience:
        print(f"Early stopping at epoch {epoch+1}")
        break

    if (epoch + 1) % 5 == 0:
        print(f"Epoch {epoch+1} | Loss: {train_loss:.4f} | Train AUC: {train_score:.4f} | Val AUC: {val_score:.4f}")

model.load_state_dict(best_weights)

test_score = evaluate(imdb, test_mask)
whole_score = evaluate(imdb, slice(None))
print(f"Final Test AUC: {test_score:.4f} | Whole Dataset AUC: {whole_score:.4f}")


tensor(1388, device='cuda:0') tensor(1432, device='cuda:0') tensor(2794, device='cuda:0')
torch.Size([5614]) torch.Size([5614]) torch.Size([5614])
Epoch 5 | Loss: 1.0841 | Train AUC: 0.6932 | Val AUC: 0.6546
Epoch 10 | Loss: 1.0035 | Train AUC: 0.6410 | Val AUC: 0.5815
Epoch 15 | Loss: 0.9801 | Train AUC: 0.6812 | Val AUC: 0.6241
Epoch 20 | Loss: 0.9762 | Train AUC: 0.7846 | Val AUC: 0.7008
Epoch 25 | Loss: 0.9626 | Train AUC: 0.8295 | Val AUC: 0.7608
Epoch 30 | Loss: 0.9487 | Train AUC: 0.8534 | Val AUC: 0.7810
Epoch 35 | Loss: 0.9213 | Train AUC: 0.9342 | Val AUC: 0.8353
Epoch 40 | Loss: 0.8692 | Train AUC: 0.9607 | Val AUC: 0.8788
Epoch 45 | Loss: 0.7718 | Train AUC: 0.9645 | Val AUC: 0.8642
Epoch 50 | Loss: 0.6936 | Train AUC: 0.9755 | Val AUC: 0.8457
Epoch 55 | Loss: 0.6639 | Train AUC: 0.9841 | Val AUC: 0.8226
Epoch 60 | Loss: 0.6503 | Train AUC: 0.9883 | Val AUC: 0.8083
Epoch 65 | Loss: 0.6361 | Train AUC: 0.9912 | Val AUC: 0.8088
Epoch 70 | Loss: 0.6212 | Train AUC: 0.9954 | Va