# MARA - IMDB_mlh dataset tests - by Bartosz Trojan
The implementation will be based on the official MARA paper
Right now I don't have much to show, but this notebook will be updated

## Imports and data preprocessing

In [1]:
# os.environ['TORCH'] = torch.__version__
# print(torch.__version__)

# !pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
# !pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
# !pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

In [1]:
import os
import torch
from torch.nn import Linear
from torch_geometric.nn import GCNConv
from utils.read_data_new import IMDB_mlh

imdb = IMDB_mlh()
imdb.info()

  from .autonotebook import tqdm as notebook_tqdm


torch.Size([5614, 1000])
torch.Size([5614])
IMDB movie type dataset:
 Number of nodes: 5614
 Number of edges: 14715
 Number of edges: layer1: 5443, layer2: 3658, cross_layer: 5614
 Number of features: 1000
 Number of classes: 3
 Number of nodes per class: tensor([ 640, 2438, 2536])


## Model architecture

In [None]:
class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(1234)
        self.conv1 = GCNConv(imdb.get_number_of_features(), 512)
        self.conv2 = GCNConv(512, 256)
        self.conv3 = GCNConv(256, 52)
        self.classifier = Linear(52, imdb.get_number_of_classes())

    def forward(self, x, edge_index):
        h = self.conv1(x, edge_index)
        h = h.tanh()
        h = self.conv2(h, edge_index)
        h = h.tanh()
        h = self.conv3(h, edge_index)
        h = h.tanh()  # Final GNN embedding space.

        # Apply a final (linear) classifier.
        out = torch.sigmoid(self.classifier(h))

        return out, h

model = GCN()
print(model)

AttributeError: 'IMDB_mlh' object has no attribute 'num_features'

## Simple model training

In [None]:
model = GCN()

out, h = model(imdb.node_features, imdb.layer_1.t())

print(out.shape)
print(h.shape)

torch.Size([2807, 3])
torch.Size([2807, 52])


In [None]:
# tymczasowo dla przyśpieszenia testów

from config import config
import torch

class MARA():
    def __init__(self, simplificaton_type=config["simplification_type"], simplification_stages=config["simplification_stages"], simplification_strategy=config["simplification_strategy"], DE_p=config["DE_p"], NS_k=config["NS_k"]):
        self.simplification_type = simplificaton_type
        self.simplification_stages = simplification_stages
        self.simplification_strategy = simplification_strategy
        self.DE_p = DE_p
        self.NS_k = NS_k

    def simplify(self, nodes_for_each_layer, edges_for_each_layer, cross_layer_edges, node_classes):
        if(self.simplification_strategy == "DE"):
            if(self.simplification_type == "l-b-l"):
                simplified = []
                for layer in range(len(edges_for_each_layer)):
                    print(edges_for_each_layer[layer].shape)
                    mask = torch.rand(1, edges_for_each_layer[layer].shape[0]) > self.DE_p
                    simplified.append(edges_for_each_layer[layer][mask.squeeze()].clone())
                    print(simplified[layer].shape)
                return simplified


In [None]:
mara = MARA()

siplified_edges = mara.simplify(imdb.node_features, [imdb.layer_1, imdb.layer_2], [], imdb.classes)

torch.Size([5443, 2])
torch.Size([4366, 2])
torch.Size([3658, 2])
torch.Size([2901, 2])


In [None]:
model = GCN()
criterion = torch.nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(model.parameters(), lr=0.01) 

def accuracy(preds, labels):
    predicted_labels = torch.argmax(preds, dim=1)
    accuracy = (predicted_labels == labels).float().mean()

    return accuracy

def train(data):
    optimizer.zero_grad()
    out, h = model(data.node_features, data.layer_1.t()) 
    train_mask = data.get_training_mask(mask_size=0.5)

    loss = criterion(out[train_mask], data.classes[train_mask])
    acc = accuracy(out[train_mask], data.classes[train_mask])

    loss.backward()
    optimizer.step()

    return loss, acc

for epoch in range(201):
    loss, acc = train(imdb)
    if (epoch+1)%10 == 0:
        print("======== ",epoch+1," ========")
        print(f"Loss: {loss}")
        print(f"Accuracy: {acc}")

Loss: 0.8080915808677673
Accuracy: 0.7692307829856873
Loss: 0.7790372371673584
Accuracy: 0.7760055661201477
Loss: 0.7441185116767883
Accuracy: 0.807666003704071
Loss: 0.7488386034965515
Accuracy: 0.8007042407989502
Loss: 0.7199681401252747
Accuracy: 0.8195804357528687
Loss: 0.7107707858085632
Accuracy: 0.8474830985069275
Loss: 0.7006860375404358
Accuracy: 0.8562091588973999
Loss: 0.6927056312561035
Accuracy: 0.8596127033233643
Loss: 0.690304696559906
Accuracy: 0.8615494966506958
Loss: 0.6872328519821167
Accuracy: 0.8568249344825745
Loss: 0.6731147766113281
Accuracy: 0.875
Loss: 0.685964047908783
Accuracy: 0.8640287518501282
Loss: 0.6952534317970276
Accuracy: 0.8538135886192322
Loss: 0.6806227564811707
Accuracy: 0.8711302876472473
Loss: 0.6810298562049866
Accuracy: 0.8672817349433899
Loss: 0.6812463402748108
Accuracy: 0.8690476417541504
Loss: 0.6749157309532166
Accuracy: 0.8720608353614807
Loss: 0.681624710559845
Accuracy: 0.8650619983673096
Loss: 0.682450532913208
Accuracy: 0.861950576