## Google Colab

In [None]:
from google.colab import drive

drive.mount("/content/drive")
CURR_DIR = "/content/drive/MyDrive/FYP"

## Local

In [None]:
CURR_DIR = ".."

## Imports

In [None]:
REQUIREMENTS_PATH = f"{CURR_DIR}/requirements/base.txt"

!pip install -r {REQUIREMENTS_PATH}

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys; sys.path.append(CURR_DIR)
import os
from src.models.graph import generate_and_save_graph
from src.models.node_embeddings import generate_and_save_train_embeddings, generate_and_save_test_embeddings
from src.evaluation.evaluate import evaluate

In [None]:
K_VALS = [10, 15, 20]

## Citation Network

In [None]:
generate_and_save_graph(
    os.path.join(CURR_DIR, "data/embeddings/v10_train_graph.pkl"),
    os.path.join(CURR_DIR, "data/parsed/v10_train.json")
)

## Node2vec Embeddings

In [None]:
generate_and_save_train_embeddings(
    os.path.join(CURR_DIR, "data/embeddings/v10_train_node2vec.faiss"),
    os.path.join(CURR_DIR, "data/embeddings/v10_train_node2vec_ids.pkl"),
    os.path.join(CURR_DIR, "data/embeddings/v10_train_graph.pkl")
)

In [None]:
N_VALS = [1, 3]

### TF-IDF Vectors

In [None]:
for n in N_VALS:
    generate_and_save_test_embeddings(
        os.path.join(CURR_DIR, f"data/embeddings/v10_test_node2vec_tfidf_{n}.faiss"),
        os.path.join(CURR_DIR, "data/embeddings/v10_test_node2vec_ids.pkl"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_node2vec.faiss"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_node2vec_ids.pkl"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_tfidf.faiss"),
        os.path.join(CURR_DIR, "data/embeddings/v10_test_tfidf.faiss"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_tfidf_ids.pkl"),
        os.path.join(CURR_DIR, "data/embeddings/v10_test_tfidf_ids.pkl"),
        n
    )

In [None]:
for n in N_VALS:
    evaluate(
        os.path.join(CURR_DIR, f"data/results/v10_node2vec_tfidf_{n}_results.csv"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_node2vec.faiss"),
        os.path.join(CURR_DIR, f"data/embeddings/v10_test_node2vec_tfidf_{n}.faiss"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_node2vec_ids.pkl"),
        os.path.join(CURR_DIR, "data/embeddings/v10_test_node2vec_ids.pkl"),
        os.path.join(CURR_DIR, "data/parsed/v10_test.json"),
        K_VALS
    )

### SciBERT Embeddings

In [None]:
for n in N_VALS:
    generate_and_save_test_embeddings(
        os.path.join(CURR_DIR, f"data/embeddings/v10_test_node2vec_scibert_{n}.faiss"),
        os.path.join(CURR_DIR, "data/embeddings/v10_test_node2vec_ids.pkl"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_node2vec.faiss"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_node2vec_ids.pkl"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_scibert.faiss"),
        os.path.join(CURR_DIR, "data/embeddings/v10_test_scibert.faiss"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_scibert_ids.pkl"),
        os.path.join(CURR_DIR, "data/embeddings/v10_test_scibert_ids.pkl"),
        n
    )

In [None]:
for n in N_VALS:
    evaluate(
        os.path.join(CURR_DIR, f"data/results/v10_node2vec_scibert_{n}_results.csv"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_node2vec.faiss"),
        os.path.join(CURR_DIR, f"data/embeddings/v10_test_node2vec_scibert_{n}.faiss"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_node2vec_ids.pkl"),
        os.path.join(CURR_DIR, "data/embeddings/v10_test_node2vec_ids.pkl"),
        os.path.join(CURR_DIR, "data/parsed/v10_test.json"),
        K_VALS
    )

### SPECTER Embeddings

In [None]:
for n in N_VALS:
    generate_and_save_test_embeddings(
        os.path.join(CURR_DIR, f"data/embeddings/v10_test_node2vec_specter_{n}.faiss"),
        os.path.join(CURR_DIR, "data/embeddings/v10_test_node2vec_ids.pkl"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_node2vec.faiss"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_node2vec_ids.pkl"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_specter.faiss"),
        os.path.join(CURR_DIR, "data/embeddings/v10_test_specter.faiss"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_specter_ids.pkl"),
        os.path.join(CURR_DIR, "data/embeddings/v10_test_specter_ids.pkl"),
        n
    )

In [None]:
for n in N_VALS:
    evaluate(
        os.path.join(CURR_DIR, f"data/results/v10_node2vec_specter_{n}_results.csv"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_node2vec.faiss"),
        os.path.join(CURR_DIR, f"data/embeddings/v10_test_node2vec_specter_{n}.faiss"),
        os.path.join(CURR_DIR, "data/embeddings/v10_train_node2vec_ids.pkl"),
        os.path.join(CURR_DIR, "data/embeddings/v10_test_node2vec_ids.pkl"),
        os.path.join(CURR_DIR, "data/parsed/v10_test.json"),
        K_VALS
    )