## Google Colab

In [None]:
from google.colab import drive

drive.mount("/content/drive")
CURR_DIR = "/content/drive/MyDrive/FYP"

## Local

In [2]:
CURR_DIR = ".."

## Imports

In [None]:
REQUIREMENTS_PATH = f"{CURR_DIR}/requirements/base.txt"

!pip install -r {REQUIREMENTS_PATH}

In [None]:
!pip install transformers==4.28.1

In [1]:
%load_ext autoreload
%autoreload 2

In [5]:
import sys; sys.path.append(CURR_DIR)
from src.models.graph.node_embeddings import generate_node2vec_embeddings
from src.models.fusion.fusion import train_cca, train_dcca, concat_embeddings, create_linear_combiner
from notebooks.entrypoint_utils import (
    run_generate_and_save_graph,
    run_generate_and_save_train_node_embeddings,
    run_generate_and_save_test_node_embeddings,
    run_evaluate,
    run_train_fusion_model,
    run_project_embeddings,
    run_fuse_embeddings,
    run_fuse_embeddings_with_projection
)
from notebooks.settings import N_VALS, K_VALS, ALPHA_VALS

## Citation Network

In [None]:
run_generate_and_save_graph(CURR_DIR, "v10")

## Node2vec

In [None]:
run_generate_and_save_train_node_embeddings(
    CURR_DIR,
    "v10",
    "node2vec",
    generate_node2vec_embeddings
)

### Generate Test Node2vec Embeddings

#### TF-IDF

In [None]:
run_generate_and_save_test_node_embeddings(CURR_DIR, "v10", "node2vec", "tfidf", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", "node2vec", K_VALS, secondary_model=f"tfidf_{n}")

#### SciBERT

In [None]:
run_generate_and_save_test_node_embeddings(CURR_DIR, "v10", "node2vec", "scibert", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", "node2vec", K_VALS, secondary_model=f"scibert_{n}")

#### Doc2vec

In [None]:
run_generate_and_save_test_node_embeddings(CURR_DIR, "v10", "node2vec", "doc2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", "node2vec", K_VALS, secondary_model=f"doc2vec_{n}")

#### SPECTER

In [None]:
run_generate_and_save_test_node_embeddings(CURR_DIR, "v10", "node2vec", "specter", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", "node2vec", K_VALS, secondary_model=f"specter_{n}")

#### SPECTER2

In [None]:
run_generate_and_save_test_node_embeddings(CURR_DIR, "v10", "node2vec", "specter2", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", "node2vec", K_VALS, secondary_model=f"specter2_{n}")

## Simple Concatenation

### With Node2vec

#### TF-IDF

In [None]:
run_fuse_embeddings(CURR_DIR, "v10", "concat", concat_embeddings, "tfidf", "node2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"tfidf_{n}", K_VALS, secondary_model="node2vec", fusion_model="concat")

#### SciBERT

In [None]:
run_fuse_embeddings(CURR_DIR, "v10", "concat", concat_embeddings, "scibert", "node2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"scibert_{n}", K_VALS, secondary_model="node2vec", fusion_model="concat")

#### Doc2vec

In [None]:
run_fuse_embeddings(CURR_DIR, "v10", "concat", concat_embeddings, "doc2vec", "node2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"doc2vec_{n}", K_VALS, secondary_model="node2vec", fusion_model="concat")

#### SPECTER

In [None]:
run_fuse_embeddings(CURR_DIR, "v10", "concat", concat_embeddings, "specter", "node2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"specter_{n}", K_VALS, secondary_model="node2vec", fusion_model="concat")

#### SPECTER2

In [None]:
run_fuse_embeddings(CURR_DIR, "v10", "concat", concat_embeddings, "specter2", "node2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"specter2_{n}", K_VALS, secondary_model="node2vec", fusion_model="concat")

## Canonical Correlation Analysis (CCA)

### With Node2vec

#### TF-IDF

In [None]:
run_train_fusion_model(CURR_DIR, "v10", "cca", train_cca, "tfidf", "node2vec")

In [None]:
run_project_embeddings(CURR_DIR, "v10", "cca", "tfidf", "node2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"tfidf_{n}", K_VALS, secondary_model="node2vec", fusion_model="cca_text_projected")
    run_evaluate(CURR_DIR, "v10", f"tfidf_{n}", K_VALS, secondary_model="node2vec", fusion_model="cca_node_projected")

In [None]:
run_fuse_embeddings_with_projection(CURR_DIR, "v10", "cca_concat", concat_embeddings, "tfidf", "node2vec", N_VALS)

for alpha in ALPHA_VALS:
    run_fuse_embeddings_with_projection(
        CURR_DIR,
        "v10",
        f"cca_fused_{alpha}",
        create_linear_combiner(alpha),
        "tfidf",
        "node2vec",
        N_VALS
    )

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"tfidf_{n}", K_VALS, secondary_model="node2vec", fusion_model="cca_concat")

    for alpha in ALPHA_VALS:
        run_evaluate(CURR_DIR, "v10", f"tfidf_{n}", K_VALS, secondary_model="node2vec", fusion_model=f"cca_fused_{alpha}")

#### SciBERT

In [None]:
run_train_fusion_model(CURR_DIR, "v10", "cca", train_cca, "scibert", "node2vec")

In [None]:
run_project_embeddings(CURR_DIR, "v10", "cca", "scibert", "node2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"scibert_{n}", K_VALS, secondary_model="node2vec", fusion_model="cca_text_projected")
    run_evaluate(CURR_DIR, "v10", f"scibert_{n}", K_VALS, secondary_model="node2vec", fusion_model="cca_node_projected")

In [None]:
run_fuse_embeddings_with_projection(CURR_DIR, "v10", "cca_concat", concat_embeddings, "scibert", "node2vec", N_VALS)

for alpha in ALPHA_VALS:
    run_fuse_embeddings_with_projection(
        CURR_DIR,
        "v10",
        f"cca_fused_{alpha}",
        create_linear_combiner(alpha),
        "scibert",
        "node2vec",
        N_VALS
    )

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"scibert_{n}", K_VALS, secondary_model="node2vec", fusion_model="cca_concat")

    for alpha in ALPHA_VALS:
        run_evaluate(CURR_DIR, "v10", f"scibert_{n}", K_VALS, secondary_model="node2vec", fusion_model=f"cca_fused_{alpha}")

#### Doc2vec

In [None]:
run_train_fusion_model(CURR_DIR, "v10", "cca", train_cca, "doc2vec", "node2vec")

In [None]:
run_project_embeddings(CURR_DIR, "v10", "cca", "doc2vec", "node2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"doc2vec_{n}", K_VALS, secondary_model="node2vec", fusion_model="cca_text_projected")
    run_evaluate(CURR_DIR, "v10", f"doc2vec_{n}", K_VALS, secondary_model="node2vec", fusion_model="cca_node_projected")

In [None]:
run_fuse_embeddings_with_projection(CURR_DIR, "v10", "cca_concat", concat_embeddings, "doc2vec", "node2vec", N_VALS)

for alpha in ALPHA_VALS:
    run_fuse_embeddings_with_projection(
        CURR_DIR,
        "v10",
        f"cca_fused_{alpha}",
        create_linear_combiner(alpha),
        "doc2vec",
        "node2vec",
        N_VALS
    )

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"doc2vec_{n}", K_VALS, secondary_model="node2vec", fusion_model="cca_concat")

    for alpha in ALPHA_VALS:
        run_evaluate(CURR_DIR, "v10", f"doc2vec_{n}", K_VALS, secondary_model="node2vec", fusion_model=f"cca_fused_{alpha}")

#### SPECTER

In [None]:
run_train_fusion_model(CURR_DIR, "v10", "cca", train_cca, "specter", "node2vec")

In [None]:
run_project_embeddings(CURR_DIR, "v10", "cca", "specter", "node2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"specter_{n}", K_VALS, secondary_model="node2vec", fusion_model="cca_text_projected")
    run_evaluate(CURR_DIR, "v10", f"specter_{n}", K_VALS, secondary_model="node2vec", fusion_model="cca_node_projected")

In [None]:
run_fuse_embeddings_with_projection(CURR_DIR, "v10", "cca_concat", concat_embeddings, "specter", "node2vec", N_VALS)

for alpha in ALPHA_VALS:
    run_fuse_embeddings_with_projection(
        CURR_DIR,
        "v10",
        f"cca_fused_{alpha}",
        create_linear_combiner(alpha),
        "specter",
        "node2vec",
        N_VALS
    )

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"specter_{n}", K_VALS, secondary_model="node2vec", fusion_model="cca_concat")

    for alpha in ALPHA_VALS:
        run_evaluate(CURR_DIR, "v10", f"specter_{n}", K_VALS, secondary_model="node2vec", fusion_model=f"cca_fused_{alpha}")

#### SPECTER2

In [None]:
run_train_fusion_model(CURR_DIR, "v10", "cca", train_cca, "specter2", "node2vec")

In [None]:
run_project_embeddings(CURR_DIR, "v10", "cca", "specter2", "node2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"specter2_{n}", K_VALS, secondary_model="node2vec", fusion_model="cca_text_projected")
    run_evaluate(CURR_DIR, "v10", f"specter2_{n}", K_VALS, secondary_model="node2vec", fusion_model="cca_node_projected")

In [None]:
run_fuse_embeddings_with_projection(CURR_DIR, "v10", "cca_concat", concat_embeddings, "specter2", "node2vec", N_VALS)

for alpha in ALPHA_VALS:
    run_fuse_embeddings_with_projection(
        CURR_DIR,
        "v10",
        f"cca_fused_{alpha}",
        create_linear_combiner(alpha),
        "specter2",
        "node2vec",
        N_VALS
    )

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"specter2_{n}", K_VALS, secondary_model="node2vec", fusion_model="cca_concat")

    for alpha in ALPHA_VALS:
        run_evaluate(CURR_DIR, "v10", f"specter2_{n}", K_VALS, secondary_model="node2vec", fusion_model=f"cca_fused_{alpha}")

## Deep CCA

### With Node2vec

#### TF-IDF

In [None]:
run_train_fusion_model(CURR_DIR, "v10", "dcca", train_dcca, "tfidf", "node2vec")

In [None]:
run_project_embeddings(CURR_DIR, "v10", "dcca", "tfidf", "node2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"tfidf_{n}", K_VALS, secondary_model="node2vec", fusion_model="dcca_text_projected")
    run_evaluate(CURR_DIR, "v10", f"tfidf_{n}", K_VALS, secondary_model="node2vec", fusion_model="dcca_node_projected")

In [None]:
run_fuse_embeddings_with_projection(CURR_DIR, "v10", "dcca_concat", concat_embeddings, "tfidf", "node2vec", N_VALS)

for alpha in ALPHA_VALS:
    run_fuse_embeddings_with_projection(
        CURR_DIR,
        "v10",
        f"dcca_fused_{alpha}",
        create_linear_combiner(alpha),
        "tfidf",
        "node2vec",
        N_VALS
    )

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"tfidf_{n}", K_VALS, secondary_model="node2vec", fusion_model="dcca_concat")

    for alpha in ALPHA_VALS:
        run_evaluate(CURR_DIR, "v10", f"tfidf_{n}", K_VALS, secondary_model="node2vec", fusion_model=f"dcca_fused_{alpha}")

#### SciBERT

In [None]:
run_train_fusion_model(CURR_DIR, "v10", "dcca", train_dcca, "scibert", "node2vec")

In [None]:
run_project_embeddings(CURR_DIR, "v10", "dcca", "scibert", "node2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"scibert_{n}", K_VALS, secondary_model="node2vec", fusion_model="dcca_text_projected")
    run_evaluate(CURR_DIR, "v10", f"scibert_{n}", K_VALS, secondary_model="node2vec", fusion_model="dcca_node_projected")

In [None]:
run_fuse_embeddings_with_projection(CURR_DIR, "v10", "dcca_concat", concat_embeddings, "scibert", "node2vec", N_VALS)

for alpha in ALPHA_VALS:
    run_fuse_embeddings_with_projection(
        CURR_DIR,
        "v10",
        f"dcca_fused_{alpha}",
        create_linear_combiner(alpha),
        "scibert",
        "node2vec",
        N_VALS
    )

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"scibert_{n}", K_VALS, secondary_model="node2vec", fusion_model="dcca_concat")

    for alpha in ALPHA_VALS:
        run_evaluate(CURR_DIR, "v10", f"scibert_{n}", K_VALS, secondary_model="node2vec", fusion_model=f"dcca_fused_{alpha}")

#### Doc2vec

In [None]:
run_train_fusion_model(CURR_DIR, "v10", "dcca", train_dcca, "doc2vec", "node2vec")

In [None]:
run_project_embeddings(CURR_DIR, "v10", "dcca", "doc2vec", "node2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"doc2vec_{n}", K_VALS, secondary_model="node2vec", fusion_model="dcca_text_projected")
    run_evaluate(CURR_DIR, "v10", f"doc2vec_{n}", K_VALS, secondary_model="node2vec", fusion_model="dcca_node_projected")

In [None]:
run_fuse_embeddings_with_projection(CURR_DIR, "v10", "dcca_concat", concat_embeddings, "doc2vec", "node2vec", N_VALS)

for alpha in ALPHA_VALS:
    run_fuse_embeddings_with_projection(
        CURR_DIR,
        "v10",
        f"dcca_fused_{alpha}",
        create_linear_combiner(alpha),
        "doc2vec",
        "node2vec",
        N_VALS
    )

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"doc2vec_{n}", K_VALS, secondary_model="node2vec", fusion_model="dcca_concat")

    for alpha in ALPHA_VALS:
        run_evaluate(CURR_DIR, "v10", f"doc2vec_{n}", K_VALS, secondary_model="node2vec", fusion_model=f"dcca_fused_{alpha}")

#### SPECTER

In [None]:
run_train_fusion_model(CURR_DIR, "v10", "dcca", train_dcca, "specter", "node2vec")

In [None]:
run_project_embeddings(CURR_DIR, "v10", "dcca", "specter", "node2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"specter_{n}", K_VALS, secondary_model="node2vec", fusion_model="dcca_text_projected")
    run_evaluate(CURR_DIR, "v10", f"specter_{n}", K_VALS, secondary_model="node2vec", fusion_model="dcca_node_projected")

In [None]:
run_fuse_embeddings_with_projection(CURR_DIR, "v10", "dcca_concat", concat_embeddings, "specter", "node2vec", N_VALS)

for alpha in ALPHA_VALS:
    run_fuse_embeddings_with_projection(
        CURR_DIR,
        "v10",
        f"dcca_fused_{alpha}",
        create_linear_combiner(alpha),
        "specter",
        "node2vec",
        N_VALS
    )

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"specter_{n}", K_VALS, secondary_model="node2vec", fusion_model="dcca_concat")

    for alpha in ALPHA_VALS:
        run_evaluate(CURR_DIR, "v10", f"specter_{n}", K_VALS, secondary_model="node2vec", fusion_model=f"dcca_fused_{alpha}")

#### SPECTER2

In [None]:
run_train_fusion_model(CURR_DIR, "v10", "dcca", train_dcca, "specter2", "node2vec")

In [None]:
run_project_embeddings(CURR_DIR, "v10", "dcca", "specter2", "node2vec", N_VALS)

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"specter2_{n}", K_VALS, secondary_model="node2vec", fusion_model="dcca_text_projected")
    run_evaluate(CURR_DIR, "v10", f"specter2_{n}", K_VALS, secondary_model="node2vec", fusion_model="dcca_node_projected")

In [None]:
run_fuse_embeddings_with_projection(CURR_DIR, "v10", "dcca_concat", concat_embeddings, "specter2", "node2vec", N_VALS)

for alpha in ALPHA_VALS:
    run_fuse_embeddings_with_projection(
        CURR_DIR,
        "v10",
        f"dcca_fused_{alpha}",
        create_linear_combiner(alpha),
        "specter2",
        "node2vec",
        N_VALS
    )

In [None]:
for n in N_VALS:
    run_evaluate(CURR_DIR, "v10", f"specter2_{n}", K_VALS, secondary_model="node2vec", fusion_model="dcca_concat")

    for alpha in ALPHA_VALS:
        run_evaluate(CURR_DIR, "v10", f"specter2_{n}", K_VALS, secondary_model="node2vec", fusion_model=f"dcca_fused_{alpha}")