In [1]:
from TAS import TAS
from MAS import MAS

import pickle
import multiprocessing
import utils as U
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score

In [2]:
DATA_PATH = "./cora_nodes.pickle"

In [3]:
with open(DATA_PATH, "rb") as f:
    (
        g,
        [train_X, train_Y],
        [val_X, val_Y],
        [test_X, test_Y],
    ) = pickle.load(f)

In [4]:
# Hyperparams
WORKERS = multiprocessing.cpu_count()
SPLIT_SEED = 1
ITER = 1

# MAS
mas_k = 7
mas_thresh_len = 15
mas_win_size = 8
mas_num_permutations = 8
mas_dimensions = 128


# TAS
tas_k = 6
tas_thresh = 4
tas_thresh_len = 10
tas_win_size = 5
tas_num_permutations = 8
tas_dimensions = 128

### TAS

In [5]:
tas = TAS(
        g=g,
        k=tas_k,
        num_permutations=tas_num_permutations,
        thresh_len=tas_thresh_len,
        threshold=tas_thresh,
        dimensions=tas_dimensions,
        window_size=tas_win_size,
        workers=WORKERS,
        iter=ITER,
    )

PERMUTATION: 0
PERMUTATION: 1
PERMUTATION: 2
PERMUTATION: 3
PERMUTATION: 4
PERMUTATION: 5
PERMUTATION: 6
PERMUTATION: 7


#### Get node embeddings

In [6]:
train_node_embeds_tas = U.get_node_embeds(train_X, tas.get_embedding)
val_node_embeds_tas = U.get_node_embeds(val_X, tas.get_embedding)
test_node_embeds_tas = U.get_node_embeds(test_X, tas.get_embedding)

#### Prediction and evaluation

In [7]:
node_clf_tas = LogisticRegression(multi_class="multinomial", max_iter=400)
node_clf_tas.fit(train_node_embeds_tas, train_Y)

val_preds_tas = node_clf_tas.predict(val_node_embeds_tas)
val_f1_macro_tas = f1_score(val_preds_tas, val_Y, average="macro")
val_f1_micro_tas = f1_score(val_preds_tas, val_Y, average="micro")

test_preds_tas = node_clf_tas.predict(test_node_embeds_tas)
test_f1_macro_tas = f1_score(test_preds_tas, test_Y, average="macro")
test_f1_micro_tas = f1_score(test_preds_tas, test_Y, average="micro")

In [8]:
print(f"Validation macro f1: {val_f1_macro_tas}, Validation micro f1: {val_f1_micro_tas}")
print(f"Test macro f1: {test_f1_macro_tas}, Test micro f1: {test_f1_micro_tas}")

Validation macro f1: 0.6812571271818774, Validation micro f1: 0.692
Test macro f1: 0.7114762149231728, Test micro f1: 0.715


## MAS

In [9]:
mas = MAS(
        g=g,
        k=mas_k,
        num_permutations=mas_num_permutations,
        thresh_len=mas_thresh_len,
        # threshold=mas_thresh,
        dimensions=mas_dimensions,
        window_size=mas_win_size,
        workers=WORKERS,
        iter=ITER,
    )

PERMUTATION: 0


PERMUTATION: 1
PERMUTATION: 2
PERMUTATION: 3
PERMUTATION: 4
PERMUTATION: 5
PERMUTATION: 6
PERMUTATION: 7


#### Get node embeddings

In [10]:
train_node_embeds_mas = U.get_node_embeds(train_X, mas.get_embedding)
val_node_embeds_mas = U.get_node_embeds(val_X, mas.get_embedding)
test_node_embeds_mas = U.get_node_embeds(test_X, mas.get_embedding)

#### Prediction and evaluation

In [11]:
node_clf_mas = LogisticRegression(multi_class="multinomial", max_iter=400)
node_clf_mas.fit(train_node_embeds_mas, train_Y)

val_preds_mas = node_clf_mas.predict(val_node_embeds_mas)
val_f1_macro_mas = f1_score(val_preds_mas, val_Y, average="macro")
val_f1_micro_mas = f1_score(val_preds_mas, val_Y, average="micro")

test_preds_mas = node_clf_mas.predict(test_node_embeds_mas)
test_f1_macro_mas = f1_score(test_preds_mas, test_Y, average="macro")
test_f1_micro_mas = f1_score(test_preds_mas, test_Y, average="micro")

In [12]:
print(f"Validation macro f1: {val_f1_macro_mas}, Validation micro f1: {val_f1_micro_mas}")
print(f"Test macro f1: {test_f1_macro_mas}, Test micro f1: {test_f1_micro_mas}")

Validation macro f1: 0.6903738655716376, Validation micro f1: 0.696
Test macro f1: 0.7202994821961968, Test micro f1: 0.7250000000000001
