In [None]:
import torch
from torch import nn
from torch.nn.modules.module import Module
import numpy as np
import networkx as nx
from collections import defaultdict
import pandas as pd
from torch import optim
import seaborn as sns
import random
import networkx as nx
from itertools import permutations, combinations
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split, StratifiedKFold
from matplotlib import pyplot as plt
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier

base_data_path = ''

In [None]:
class MeanAggregator(Module):
    '''
    This class does the sampling and aggregating given a set of features

    We store everything except the set of nodes to aggregate on the class

    We pass the nodes (batch, neg samples, etc) to forward
    '''
    def __init__(
        self,
        features,
        feature_dim,
        emb_dim,
        n_nbr_samples,
        g,
        dropout=0.5,
        depth=1,
        batchnorm=True,
    ):
        super(MeanAggregator, self).__init__()
        self.feature_dim = feature_dim
        self.depth = depth
        if batchnorm:
            self.fc = nn.Sequential(
                nn.Linear(feature_dim, emb_dim),
                nn.BatchNorm1d(emb_dim),
                nn.Tanh(),
                nn.Dropout(dropout),
            ).cuda()
        else:
            self.fc = nn.Sequential(
                nn.Linear(feature_dim, emb_dim),
                nn.Tanh(),
                nn.Dropout(dropout),
            ).cuda()
        self.features = features
        self.n_nbr_samples = n_nbr_samples
        self.g = g # can be any dict like: {node: collection(nbrs)}
        self.random_ = np.random.choice
        self.set_ = set

    def forward(
        self,
        node_list,
        randomize_features=False,
    ):
        '''
        features is (unique_node_dim by feature_dim)
        mask is     (node_list by unique_node_dim)
        '''
        # samples and node_list are ordered the same
        samples = [
            list(self.random_(
                list(self.g[node]),
                self.n_nbr_samples,
                replace=False,
            )) + [node]
            if len(self.g[node]) >= self.n_nbr_samples else list(self.g[node]) + [node]
            for node in node_list
        ]
        unique_nodes_list = list(set.union(*(self.set_(x) for x in samples)))
        # this helps us keep column indexes straight
        unique_nodes_dict = {node: idx for idx, node in enumerate(unique_nodes_list)}

        # rows: ordered by samples, cols: ordered by unique node idx (vals in unique_nodes_dict)
        mask = torch.zeros(len(samples), len(unique_nodes_list)).cuda()

        row_idxs = []
        col_idxs = []
        # rows are ordered in the same order as the batch
        for node_idx, node_nbrs in enumerate(samples):
            for alter in node_nbrs:
                row_idxs.append(node_idx)
                col_idxs.append(unique_nodes_dict[alter])
        # for all but the outermost call, this self.features is a call to another encoder
        # the smart thing about this design is we only get the 2nd hop nodes we need
        sampled_features = self.fc(self.features(unique_nodes_list))
        #if randomize_features: #  and self.depth > 1:
        #    sampled_features = sampled_features[torch.randperm(sampled_features.size()[0])]
        mask[row_idxs, col_idxs] = 1
        mask = mask.div(mask.sum(dim=1).unsqueeze(1))
        if randomize_features: #  and self.depth > 1:
            mask = mask[torch.randperm(mask.size()[0])]
        return mask.mm(sampled_features)


class EncodingLayer(Module):
    '''
    Forward takes a batch and an aggregator
    It runs one iter of the aggregator and then applies the encoding layer to it
    '''
    def __init__(
        self,
        features,
        feature_dim,
        emb_input_dim,
        emb_dim,
        g,
        agg,
        base_model=None,
        dropout=0.5,
        depth=1,
        batchnorm=True,
    ):
        super(EncodingLayer, self).__init__()
        self.features = features
        self.emb_dim = emb_dim
        self.g = g
        self.agg = agg
        self.depth = depth
        if base_model:
            self.base_model = base_model
        self.fc0 = nn.Sequential(
            nn.Linear(feature_dim, emb_input_dim),
            nn.BatchNorm1d(emb_input_dim),
            nn.Tanh(),
            nn.Dropout(dropout),
        ).cuda()
        if batchnorm:
            self.fc = nn.Sequential(
                nn.Linear(emb_input_dim, emb_dim),
                nn.BatchNorm1d(emb_dim),
                nn.Tanh(),
                nn.Dropout(dropout),
            ).cuda()
        else:
            self.fc = nn.Sequential(
                nn.Linear(emb_input_dim, emb_dim),
                nn.Tanh(),
                nn.Dropout(dropout),
            ).cuda()

    def forward(self, node_list, randomize_features=False):
        emb = self.agg(
            node_list=node_list,
            randomize_features=randomize_features,
        )
        # ego_features = self.features(node_list)
        #if randomize_features and self.depth > 1:
        #    ego_features = ego_features[torch.randperm(ego_features.size()[0])]
        emb = self.fc(
            # torch.cat((self.fc0(ego_features), emb), dim=1)
            # self.fc0(ego_features) + emb
            emb
        )
        return emb


class MeanModel(Module):
    def __init__(
        self,
        emb_dim,
        n_nbr_samples1,
        n_nbr_samples2,
        g,
        features,
        hidden_dim=64,
        dropout=0.5,
    ):
        super(MeanModel, self).__init__()
        feature_dim = features.size()[1]
        self.agg1 = MeanAggregator(
            features=lambda x: features[x],
            feature_dim=feature_dim,
            emb_dim=hidden_dim,
            n_nbr_samples=n_nbr_samples1,
            g=g,
            dropout=dropout,
            batchnorm=True,
        )
        self.enc1 = EncodingLayer(
            features=lambda x: features[x],
            feature_dim=feature_dim,
            emb_input_dim=hidden_dim,
            emb_dim=hidden_dim,
            g=g,
            agg=self.agg1,
            base_model=None,
            depth=2,
            dropout=dropout,
            batchnorm=True,
        )
        self.agg2 = MeanAggregator(
            features=lambda x: self.enc1(x),
            feature_dim=hidden_dim,
            emb_dim=hidden_dim,
            n_nbr_samples=n_nbr_samples2,
            g=g,
            dropout=dropout,
            batchnorm=True,
        )
        self.enc2 = EncodingLayer(
            features=lambda x: self.enc1(x),
            feature_dim=hidden_dim,
            emb_input_dim=hidden_dim,
            emb_dim=emb_dim,
            g=g,
            agg=self.agg2,
            base_model=self.enc1,
            depth=1,
            dropout=dropout,
            batchnorm=True,
        )
        self.model = self.enc2.apply(init_weights)

    def forward(self, node_list, randomize_features=False):
        # return self.model(node_list, randomize_features)
        if self.model.training:
            return self.model(node_list, randomize_features)
        else:
            return torch.cat(
                (
                    self.enc2(node_list, False),
                    self.enc1(node_list, False),
                ),
                dim=1,
            )

    
def run_model_within(
    model_class,
    emb_dim,
    n_nbr_samples1,
    n_nbr_samples2,
    n_pos_samples,
    n_neg_samples_rand,
    n_neg_samples_shuffle,
    g,
    features,
    graph_label_dict,
    lr=0.01,
    n_runs=20,
    n_epochs=20,
    dropout=0.5,
    batch_size=256,
):
    accs = []
    aucs = []
    # accs_sd = []

    node_list = [x for x in g.node]
    
    for run in range(n_runs):
        model1 = model_class(
            emb_dim=emb_dim,
            n_nbr_samples1=n_nbr_samples1,
            n_nbr_samples2=n_nbr_samples2,
            g=g,
            features=features,
            dropout=dropout,
        )
        optimizer1 = optim.Adam(
            model1.parameters(),
            lr=lr,
            weight_decay=1e-5,
        )
        model2 = model_class(
            emb_dim=emb_dim,
            n_nbr_samples1=n_nbr_samples1,
            n_nbr_samples2=n_nbr_samples2,
            g=g,
            features=features,
            dropout=dropout,
        )
        optimizer2 = optim.Adam(
            model2.parameters(),
            lr=lr,
            weight_decay=1e-5,
        )
        total_loss = 0
        for epoch in range(n_epochs):
            model1 = model1.train()
            model2 = model2.train()
            random.shuffle(node_list)
            batch = node_list[:batch_size]
            optimizer1.zero_grad()
            optimizer2.zero_grad()
            emb_u = model1(batch)
            nbrs = []
            for node in batch:
                for _ in range(n_pos_samples):
                    nbrs.append(node)
                #for _ in range(n_pos_samples):
                #    nbrs.append(random.choice(list(g[node])))
            emb_v = model2(nbrs).view(n_pos_samples * len(batch), -1)
            neg_nodes_shuffle = []
            neg_nodes_rand = []
            for idx, node in enumerate(batch):
                for _ in range(n_neg_samples_shuffle):
                    neg_nodes_shuffle.append(node)
                batch_minus_ego = list(set(batch) - {node}) #  - set(list(g[node])))
                for _ in range(n_neg_samples_rand):
                    neg_nodes_rand.append(
                        random.choice(
                            batch_minus_ego
                        )
                    )
            if len(neg_nodes_shuffle) > 0 and len(neg_nodes_rand) > 0:
                emb_neg1 = model2(neg_nodes_shuffle, randomize_features=True)
                emb_neg2 = model2(neg_nodes_rand, randomize_features=False)
                total_neg_samples = n_neg_samples_rand + n_neg_samples_shuffle
                emb_neg = torch.cat((emb_neg1, emb_neg2), dim=1).view(
                    total_neg_samples * len(batch),
                    -1,
                )
            elif len(neg_nodes_shuffle) > 0 and len(neg_nodes_rand) == 0:
                emb_neg = model2(neg_nodes_shuffle, randomize_features=True)
            elif len(neg_nodes_shuffle) == 0 and len(neg_nodes_rand) > 0:
                emb_neg = model2(neg_nodes_rand, randomize_features=False)
            pos_weight = emb_neg.numel() / emb_u.numel()
            loss = sigmoid_loss(emb_u, emb_v, emb_neg, pos_weight)
            total_loss += float(loss.cpu().data.numpy())
            # print(total_loss / (epoch + 1))
            loss.backward()
            optimizer1.step()
            optimizer2.step()
        model1 = model1.eval()
        
        graph_nodes = defaultdict(list)

        for node in g.node:
            graph_node = g.node[node]['graph_idx']
            graph_nodes[graph_node].append(node)

        arr_list = []

        for g_idx, nodes in graph_nodes.items():
            g_emb = model1(nodes).cpu().data.numpy()
            arr_list.append(g_emb)

        y = []
        X = []

        for g_idx, g_arr in zip(graph_nodes.keys(), arr_list):
            y.append(graph_label_dict[g_idx])
            X.append(g_arr.sum(axis=0))

        X = np.array(X)
        y = np.array(y)

        run_aucs = []
        run_accs = []

        kf = StratifiedKFold(n_splits=10, shuffle=True)
        for train_index, test_index in kf.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            if len(set(y)) > 2:
                logit = OneVsRestClassifier(LogisticRegression(penalty='l2'))
                logit.fit(X_train, y_train)
                preds = logit.predict(X_test)
                run_aucs.append(0)
                run_accs.append(
                    accuracy_score(y_test, preds)
                )

            else:
                logit = LogisticRegression(penalty='l2')
                logit.fit(X_train, y_train)

                train_preds = logit.predict_proba(X_train)[:,1]
                fpr, tpr, thresholds = roc_curve(y_train, train_preds)
                max_acc = 0
                max_acc_threshold = 0
                #max_j = 0
                #max_j_threshold = 0
                for th in thresholds:
                    cm = confusion_matrix(y_train, train_preds > th)
                    # print(cm)
                    #TP = cm[0,0]
                    #FP = cm[0,1]
                    #FN = cm[1,0]
                    #TN = cm[1,1]
                    #j = TP / (TP + FN) + TN / (TN + FP) - 1
                    #if j > max_j:
                    #    max_j_threshold = th
                    #    max_j = j
                    train_acc = accuracy_score(y_train, train_preds > th)
                    if train_acc > max_acc:
                        max_acc = train_acc
                        max_acc_threshold = th
                preds = logit.predict_proba(X_test)[:,1]
                run_aucs.append(roc_auc_score(y_test, preds))
                run_accs.append(
                    accuracy_score(y_test, preds > max_acc_threshold)
                )
        print(np.mean(run_accs))
        print(np.mean(run_aucs))
        accs.append(run_accs)
        aucs.append(run_aucs)
    return accs, aucs

In [None]:
def gen_imdb_b():
    g = nx.Graph()
    with open(base_data_path + 'IMDB-BINARY/IMDB-BINARY_A.txt', 'r') as f:
        for line in f:
            n1, n2 = [int(x) for x in line.strip().split(', ')]
            g.add_edge(n1, n2)
    with open(base_data_path + 'IMDB-BINARY/IMDB-BINARY_graph_indicator.txt', 'r') as f:
        for node_idx, line in enumerate(f):
            node_idx_one = node_idx + 1
            graph_idx = int(line.strip())
            g.node[node_idx_one]['graph_idx'] = graph_idx
    g = nx.convert_node_labels_to_integers(g)
    graph_label_dict = {}
    with open(base_data_path + 'IMDB-BINARY/IMDB-BINARY_graph_labels.txt', 'r') as f:
        for graph_idx, line in enumerate(f):
            graph_idx_one = graph_idx + 1
            graph_label_dict[graph_idx_one] = 1 if int(line.strip()) == 1 else 0
    structural_features = []
    feature_dim = 30
    for node in g.node:
        structural_features.append(
            [g.degree(node)] + sorted(pad_features([g.degree(x) for x in g[node]], feature_dim), reverse=True)
        )
    structural_features = torch.FloatTensor(structural_features)
    return g, structural_features, None, graph_label_dict


def gen_mutag():
    g = nx.Graph()
    with open(base_data_path + 'MUTAG/MUTAG_A.txt', 'r') as f:
        for line in f:
            n1, n2 = [int(x) for x in line.strip().split(', ')]
            g.add_edge(n1, n2)
    with open(base_data_path + 'MUTAG/MUTAG_graph_indicator.txt', 'r') as f:
        for node_idx, line in enumerate(f):
            node_idx_one = node_idx + 1
            graph_idx = int(line.strip())
            g.node[node_idx_one]['graph_idx'] = graph_idx
    with open(base_data_path + 'MUTAG/MUTAG_node_labels.txt', 'r') as f:
        for node_idx, line in enumerate(f):
            node_idx_one = node_idx + 1
            node_label = [0., 0., 0., 0., 0., 0., 0.]
            node_label[int(line.strip())] = 1.0
            g.node[node_idx_one]['node_label'] = node_label
    g = nx.convert_node_labels_to_integers(g)
    graph_label_dict = {}
    with open(base_data_path + 'MUTAG/MUTAG_graph_labels.txt', 'r') as f:
        for graph_idx, line in enumerate(f):
            graph_idx_one = graph_idx + 1
            graph_label_dict[graph_idx_one] = 1 if int(line.strip()) == 1 else 0
    structural_features = []
    feature_dim = max(dict(g.degree).values())
    for node in g.node:
        structural_features.append(
            [g.degree(node)] + sorted(pad_features([g.degree(x) for x in g[node]], feature_dim), reverse=True)
        )
    structural_features = torch.FloatTensor(structural_features)
    other_features = []
    for node in g.node:
        other_features.append(g.node[node]['node_label'])
    other_features = torch.FloatTensor(other_features)
    return g, structural_features, other_features, graph_label_dict


def gen_reddit_b():
    g = nx.Graph()
    graph_label_dict = {}
    with open(base_data_path + 'REDDIT-BINARY/REDDIT-BINARY_A.txt', 'r') as f:
        for line in f:
            n1, n2 = [int(x) for x in line.strip().split(', ')]
            g.add_edge(n1, n2)
    with open(base_data_path + 'REDDIT-BINARY/REDDIT-BINARY_graph_indicator.txt', 'r') as f:
        for node_idx, line in enumerate(f):
            node_idx_one = node_idx + 1
            graph_idx = int(line.strip())
            if node_idx_one in g:
                g.node[node_idx_one]['graph_idx'] = graph_idx
    g = nx.convert_node_labels_to_integers(g)
    with open(base_data_path + 'REDDIT-BINARY/REDDIT-BINARY_graph_labels.txt', 'r') as f:
        for graph_idx, line in enumerate(f):
            graph_idx_one = graph_idx + 1
            graph_label_dict[graph_idx_one] = 1 if int(line.strip()) == 1 else 0
    structural_features = []
    feature_dim = 30
    for node in g.node:
        structural_features.append(
            [g.degree(node)] + sorted(pad_features([g.degree(x) for x in g[node]], feature_dim), reverse=True)
        )
    structural_features = torch.FloatTensor(structural_features)
    return g, structural_features, None, graph_label_dict


def gen_imdb_m():
    g = nx.Graph()
    with open(base_data_path + 'IMDB-MULTI/IMDB-MULTI_A.txt', 'r') as f:
        for line in f:
            n1, n2 = [int(x) for x in line.strip().split(', ')]
            g.add_edge(n1, n2)
    with open(base_data_path + 'IMDB-MULTI/IMDB-MULTI_graph_indicator.txt', 'r') as f:
        for node_idx, line in enumerate(f):
            node_idx_one = node_idx + 1
            graph_idx = int(line.strip())
            g.node[node_idx_one]['graph_idx'] = graph_idx
    g = nx.convert_node_labels_to_integers(g)
    graph_label_dict = {}
    with open(base_data_path + 'IMDB-MULTI/IMDB-MULTI_graph_labels.txt', 'r') as f:
        for graph_idx, line in enumerate(f):
            graph_idx_one = graph_idx + 1
            graph_label_dict[graph_idx_one] = int(line.strip())
    structural_features = []
    feature_dim = 30
    for node in g.node:
        structural_features.append(
            [g.degree(node)] + sorted(pad_features([g.degree(x) for x in g[node]], feature_dim), reverse=True)
        )
    structural_features = torch.FloatTensor(structural_features)
    return g, structural_features, None, graph_label_dict


def gen_reddit_m5k():
    g = nx.Graph()
    with open(base_data_path + 'REDDIT-MULTI-5K/REDDIT-MULTI-5K_A.txt', 'r') as f:
        for line in f:
            n1, n2 = [int(x) for x in line.strip().split(', ')]
            g.add_edge(n1, n2)
    with open(base_data_path + 'REDDIT-MULTI-5K/REDDIT-MULTI-5K_graph_indicator.txt', 'r') as f:
        for node_idx, line in enumerate(f):
            node_idx_one = node_idx + 1
            graph_idx = int(line.strip())
            if node_idx_one not in g:
                g.add_node(node_idx_one)
                g.add_edge(node_idx_one, random.choice(list(g.node)))
            g.node[node_idx_one]['graph_idx'] = graph_idx
    g = nx.convert_node_labels_to_integers(g)
    graph_label_dict = {}
    with open(base_data_path + 'REDDIT-MULTI-5K/REDDIT-MULTI-5K_graph_labels.txt', 'r') as f:
        for graph_idx, line in enumerate(f):
            graph_idx_one = graph_idx + 1
            graph_label_dict[graph_idx_one] = int(line.strip())
    structural_features = []
    feature_dim = 30
    for node in g.node:
        structural_features.append(
            [g.degree(node)] + sorted(pad_features([g.degree(x) for x in g[node]], feature_dim), reverse=True)
        )
    structural_features = torch.FloatTensor(structural_features)
    return g, structural_features, None, graph_label_dict


def gen_reddit_m12k():
    g = nx.Graph()
    with open(base_data_path + 'REDDIT-MULTI-12K/REDDIT-MULTI-12K_A.txt', 'r') as f:
        for line in f:
            n1, n2 = [int(x) for x in line.strip().split(', ')]
            g.add_edge(n1, n2)
    with open(base_data_path + 'REDDIT-MULTI-12K/REDDIT-MULTI-12K_graph_indicator.txt', 'r') as f:
        for node_idx, line in enumerate(f):
            node_idx_one = node_idx + 1
            graph_idx = int(line.strip())
            if node_idx_one not in g:
                g.add_node(node_idx_one)
                g.add_edge(node_idx_one, random.choice(list(g.node)))
            g.node[node_idx_one]['graph_idx'] = graph_idx
    g = nx.convert_node_labels_to_integers(g)
    graph_label_dict = {}
    with open(base_data_path + 'REDDIT-MULTI-12K/REDDIT-MULTI-12K_graph_labels.txt', 'r') as f:
        for graph_idx, line in enumerate(f):
            graph_idx_one = graph_idx + 1
            graph_label_dict[graph_idx_one] = int(line.strip())
    structural_features = []
    feature_dim = 30
    for node in g.node:
        structural_features.append(
            [g.degree(node)] + sorted(pad_features([g.degree(x) for x in g[node]], feature_dim), reverse=True)
        )
    structural_features = torch.FloatTensor(structural_features)
    return g, structural_features, None, graph_label_dict

## Mutag

In [None]:
g, features, action_features, graph_label_dict = gen_mutag()

In [None]:
mutag_within_accs, mutag_within_aucs = run_model_within(
    MeanModel,
    emb_dim=64,
    n_nbr_samples1=4,
    n_nbr_samples2=4,
    n_pos_samples=5,
    n_neg_samples_rand=10,
    n_neg_samples_shuffle=10,
    g=g,
    features=torch.cat((action_features, features), dim=1).cuda(),
    graph_label_dict=graph_label_dict,
    lr=0.02,
    n_runs=30,
    n_epochs=20,
    batch_size=256,
)

In [None]:
mutag_means = [np.mean(x) for x in mutag_within_accs]
mutag_aucs = [np.mean(x) for x in mutag_within_aucs]

In [None]:
print(np.mean(mutag_means), np.std(mutag_means))

In [None]:
print(np.mean(mutag_aucs), np.std(mutag_aucs))

## IMDB-B

In [None]:
g, features, action_features, graph_label_dict = gen_imdb_b()

In [None]:
imdb_b_within_accs, imdb_b_within_aucs = run_model_within(
    MeanModel,
    emb_dim=64,
    n_nbr_samples1=4,
    n_nbr_samples2=4,
    n_pos_samples=5,
    n_neg_samples_rand=10,
    n_neg_samples_shuffle=10,
    g=g,
    features=features.cuda(),
    graph_label_dict=graph_label_dict,
    lr=0.02,
    n_runs=30,
    n_epochs=0,
    batch_size=256,
)

In [None]:
print(np.mean(imdb_b_within_accs))
print(np.mean(imdb_b_within_aucs))

In [None]:
imdb_b_means = [np.mean(x) for x in imdb_b_within_accs]
imdb_b_aucs = [np.mean(x) for x in imdb_b_within_aucs]

In [None]:
print(np.mean(imdb_b_means), np.std(imdb_b_means))

In [None]:
print(np.mean(imdb_b_aucs), np.std(imdb_b_aucs))

## Reddit-B

In [None]:
g, features, _, graph_label_dict = gen_reddit_b()

In [None]:
reddit_b_within_accs, reddit_b_within_aucs = run_model_within(
    MeanModel,
    emb_dim=64,
    n_nbr_samples1=4,
    n_nbr_samples2=4,
    n_pos_samples=5,
    n_neg_samples_rand=10,
    n_neg_samples_shuffle=10,
    g=g,
    features=features.cuda(),
    graph_label_dict=graph_label_dict,
    lr=0.02,
    n_runs=20,
    n_epochs=0,
    batch_size=256,
)

In [None]:
print(np.mean(reddit_b_within_accs))
print(np.mean(reddit_b_within_aucs))

In [None]:
reddit_b_means = [np.mean(x) for x in reddit_b_within_accs]
reddit_b_aucs = [np.mean(x) for x in reddit_b_within_aucs]

In [None]:
print(np.mean(reddit_b_means), np.std(reddit_b_means))

In [None]:
print(np.mean(reddit_b_aucs), np.std(reddit_b_aucs))

## IMDB-M

In [None]:
g, features, _, graph_label_dict = gen_imdb_m()

In [None]:
imdb_m_within_accs, imdb_m_within_aucs = run_model_within(
    MeanModel,
    emb_dim=64,
    n_nbr_samples1=25,
    n_nbr_samples2=10,
    n_pos_samples=5,
    n_neg_samples_rand=10,
    n_neg_samples_shuffle=10,
    g=g,
    features=features.cuda(),
    graph_label_dict=graph_label_dict,
    lr=0.001,
    n_runs=20,
    n_epochs=0,
    batch_size=256,
)

In [None]:
print(np.mean(imdb_m_within_accs))
print(np.mean(imdb_m_within_aucs))

In [None]:
imdb_m_means = [np.mean(x) for x in imdb_m_within_accs]
imdb_m_aucs = [np.mean(x) for x in imdb_m_within_aucs]

In [11]:
print(np.mean(imdb_m_means), np.std(imdb_m_means))

0.506 0.0059479221395187945


In [12]:
print(np.mean(imdb_m_aucs), np.std(imdb_m_aucs))

0.0 0.0


## Reddit m5k

In [None]:
g, features, _, graph_label_dict = gen_reddit_m5k()

In [None]:
reddit_m5k_within_accs, reddit_m5k_within_aucs = run_model_within(
    MeanModel,
    emb_dim=64,
    n_nbr_samples1=25,
    n_nbr_samples2=10,
    n_pos_samples=5,
    n_neg_samples_rand=10,
    n_neg_samples_shuffle=10,
    g=g,
    features=features.cuda(),
    graph_label_dict=graph_label_dict,
    lr=0.02,
    n_runs=20,
    n_epochs=0,
    batch_size=256,
)

In [None]:
np.mean(reddit_m5k_within_accs)

In [None]:
reddit_m5k_means = [np.mean(x) for x in reddit_m5k_within_accs]
reddit_m5k_aucs = [np.mean(x) for x in reddit_m5k_within_aucs]

In [None]:
print(np.mean(reddit_m5k_means), np.std(reddit_m5k_means))

In [None]:
print(np.mean(reddit_m5k_aucs), np.std(reddit_m5k_aucs))

## Reddit m12k

In [None]:
g, features, _, graph_label_dict = gen_reddit_m12k()

In [None]:
reddit_m12k_within_accs, reddit_m12k_within_aucs = run_model_within(
    MeanModel,
    emb_dim=64,
    n_nbr_samples1=25,
    n_nbr_samples2=10,
    n_pos_samples=4,
    n_neg_samples_rand=10,
    n_neg_samples_shuffle=10,
    g=g,
    features=features.cuda(),
    graph_label_dict=graph_label_dict,
    lr=0.02,
    n_runs=20,
    n_epochs=0,
    batch_size=256,
)

In [None]:
np.mean(reddit_m12k_within_accs)

In [None]:
reddit_m12k_means = [np.mean(x) for x in reddit_m12k_within_accs]
reddit_m12k_aucs = [np.mean(x) for x in reddit_m12k_within_aucs]

In [None]:
print(np.mean(reddit_m12k_means), np.std(reddit_m12k_means))

In [None]:
print(np.mean(reddit_m12k_aucs), np.std(reddit_m12k_aucs))