In [11]:
import json
from src.datasets.who_is_who import WhoIsWhoDataset

In [12]:
def print_latex_table(data, first_rows):
    test_data = []
    eval_data = []
    for i, row in enumerate(data):
        with open(f"./data/results/{row}/training_data.json") as f:
            results = json.load(f)["results"]
            # Find index of max. f1 score
            max_test_f1_index = results["test_F1"].index(max(results["test_F1"][1:]))
            max_test_precision = results["test_precision"][max_test_f1_index]
            max_test_recall = results["test_recall"][max_test_f1_index]
            max_test_f1 = results["test_F1"][max_test_f1_index]
            max_test_accuracy = results["test_accuracies"][max_test_f1_index]
            max_test_correct_pos = results["test_accuracies_correct_pos"][max_test_f1_index]
            max_test_correct_neg = results["test_accuracies_correct_neg"][max_test_f1_index]
            
            max_eval_f1_index = results["eval_F1"].index(max(results["eval_F1"][1:]))
            max_eval_precision = results["eval_precision"][max_eval_f1_index]
            max_eval_recall = results["eval_recall"][max_eval_f1_index]
            max_eval_f1 = results["eval_F1"][max_eval_f1_index]
            max_eval_accuracy = results["eval_accuracies"][max_eval_f1_index]
            max_eval_correct_pos = results["eval_accuracies_correct_pos"][max_eval_f1_index]
            max_eval_correct_neg = results["eval_accuracies_correct_neg"][max_eval_f1_index]
            
            row_test = [
                f"\\textbf{{{first_rows[i]}}}",
                f"{max_test_precision:.2f}",
                f"{max_test_recall:.2f}",
                f"{max_test_f1:.2f}",
                f"{max_test_accuracy:.2f}",
                f"{max_test_correct_pos:.2f}",
                f"{max_test_correct_neg:.2f}",
            ]
            test_data.append(row_test)
            row_eval = [
                f"\\textbf{{{first_rows[i]}}}",
                f"{max_eval_precision:.2f}",
                f"{max_eval_recall:.2f}",
                f"{max_eval_f1:.2f}",
                f"{max_eval_accuracy:.2f}",
                f"{max_eval_correct_pos:.2f}",
                f"{max_eval_correct_neg:.2f}",
            ]
            eval_data.append(row_eval)
    
    print("\nTest Data\n")
    for row in test_data:
        print(" & ".join(row) + " \\\\")
    
    print("\nEval Data\n")
    for row in eval_data:
        print(" & ".join(row) + " \\\\")

In [13]:
data = [
    "homogeneous (title) full_emb linear_layer dropout baseline",
    "homogeneous (abstract) full_emb linear_layer dropout",
    "homogeneous (similar co-authors) full_emb linear_layer dropout small_graph",
    "homogeneous (venue) full_emb linear_layer dropout",
    "homogeneous (org) full_emb linear_layer dropout",
]

first_rows = [
    "Title",
    "Abstract",
    "Co-Author",
    "Venue",
    "Organization",
]
    

In [14]:
print("Edge Type Comparison")
print_latex_table(data, first_rows)

Edge Type Comparison

Test Data

\textbf{Title} & 0.85 & 0.92 & 0.89 & 0.88 & 0.92 & 0.84 \\
\textbf{Abstract} & 0.76 & 0.86 & 0.81 & 0.80 & 0.86 & 0.74 \\
\textbf{Co-Author} & 0.66 & 0.89 & 0.76 & 0.72 & 0.89 & 0.54 \\
\textbf{Venue} & 0.82 & 0.84 & 0.83 & 0.83 & 0.84 & 0.82 \\
\textbf{Organization} & 0.87 & 0.87 & 0.87 & 0.87 & 0.87 & 0.87 \\

Eval Data

\textbf{Title} & 0.54 & 0.67 & 0.60 & 0.55 & 0.67 & 0.42 \\
\textbf{Abstract} & 0.96 & 0.77 & 0.85 & 0.87 & 0.77 & 0.97 \\
\textbf{Co-Author} & 0.54 & 0.82 & 0.65 & 0.56 & 0.82 & 0.30 \\
\textbf{Venue} & 0.59 & 0.77 & 0.66 & 0.61 & 0.77 & 0.46 \\
\textbf{Organization} & 0.85 & 0.75 & 0.80 & 0.81 & 0.75 & 0.87 \\


## Classifiers

In [15]:
data = [
    "classifier full_emb (abstract, org, sim_author edges) fixe_enc_weights",
    "classifier full_emb (abstract, org, sim_author edges) baseline",
    "classifier full_emb (abstract, org, sim_author, same_author edges)",
    "classifier full_emb (abstract, org edges) low dim",
    "classifier full_emb (abstract, org edges) low dim 2 layers",
    "classifier full_emb (abstract, org, sim_author, same_author edges) low dim 2 layers",
]

first_rows = [
    "Baseline",
    "Fixed Enc. Weights",
    "Same Author Edges",
    "Low Dim",
    "Baseline Low Dim",
    "Same Author Baseline Low Dim",
]

In [16]:
print_latex_table(data, first_rows)


Test Data

\textbf{Baseline} & 0.80 & 0.79 & 0.80 & 0.80 & 0.79 & 0.80 \\
\textbf{Fixed Enc. Weights} & 0.91 & 0.87 & 0.89 & 0.89 & 0.87 & 0.91 \\
\textbf{Same Author Edges} & 0.92 & 0.86 & 0.89 & 0.89 & 0.86 & 0.93 \\
\textbf{Low Dim} & 0.61 & 0.80 & 0.69 & 0.64 & 0.80 & 0.48 \\
\textbf{Baseline Low Dim} & 0.74 & 0.66 & 0.70 & 0.72 & 0.66 & 0.77 \\
\textbf{Same Author Baseline Low Dim} & 0.74 & 0.68 & 0.71 & 0.72 & 0.68 & 0.77 \\

Eval Data

\textbf{Baseline} & 0.65 & 0.46 & 0.54 & 0.60 & 0.46 & 0.75 \\
\textbf{Fixed Enc. Weights} & 0.48 & 0.43 & 0.46 & 0.49 & 0.43 & 0.54 \\
\textbf{Same Author Edges} & 0.49 & 0.49 & 0.49 & 0.49 & 0.49 & 0.49 \\
\textbf{Low Dim} & 0.52 & 0.92 & 0.66 & 0.53 & 0.92 & 0.14 \\
\textbf{Baseline Low Dim} & 0.81 & 0.61 & 0.69 & 0.73 & 0.61 & 0.86 \\
\textbf{Same Author Baseline Low Dim} & 0.80 & 0.67 & 0.73 & 0.75 & 0.67 & 0.83 \\


## Dropout rates

In [17]:
data = [
    "homogeneous (abstract) full_emb linear_layer dropout 0",
    "homogeneous (abstract) full_emb linear_layer dropout 3",
    "homogeneous (abstract) full_emb linear_layer dropout 5",
]

first_rows = [
    "Dropout 0.0",
    "Dropout 0.3",
    "Dropout 0.5",
]

print_latex_table(data, first_rows)


Test Data

\textbf{Dropout 0.0} & 0.75 & 0.94 & 0.83 & 0.81 & 0.94 & 0.69 \\
\textbf{Dropout 0.3} & 0.82 & 0.86 & 0.84 & 0.83 & 0.86 & 0.81 \\
\textbf{Dropout 0.5} & 0.79 & 0.84 & 0.81 & 0.81 & 0.84 & 0.77 \\

Eval Data

\textbf{Dropout 0.0} & 0.84 & 0.67 & 0.75 & 0.77 & 0.67 & 0.87 \\
\textbf{Dropout 0.3} & 0.86 & 0.75 & 0.80 & 0.81 & 0.75 & 0.88 \\
\textbf{Dropout 0.5} & 0.93 & 0.63 & 0.75 & 0.79 & 0.63 & 0.96 \\


## Dual vs single objective

In [18]:
data = [
    "homogeneous (abstract) full_emb linear_layer dropout 32h 8out",
    "homogeneous (abstract) full_emb linear_layer dropout baseline",
    "homogeneous (abstract) full_emb linear_layer dropout 128h 32out",
]

first_rows = [
    "32 hidden, 8 out",
    "64 hidden, 16 out",
    "128 hidden, 32 out",
]

print_latex_table(data, first_rows)


Test Data

\textbf{32 hidden, 8 out} & 0.76 & 0.90 & 0.82 & 0.81 & 0.90 & 0.71 \\
\textbf{64 hidden, 16 out} & 0.81 & 0.89 & 0.85 & 0.84 & 0.89 & 0.79 \\
\textbf{128 hidden, 32 out} & 0.79 & 0.90 & 0.85 & 0.84 & 0.90 & 0.77 \\

Eval Data

\textbf{32 hidden, 8 out} & 0.96 & 0.83 & 0.89 & 0.90 & 0.83 & 0.97 \\
\textbf{64 hidden, 16 out} & 0.92 & 0.78 & 0.84 & 0.85 & 0.78 & 0.93 \\
\textbf{128 hidden, 32 out} & 0.94 & 0.82 & 0.88 & 0.89 & 0.82 & 0.95 \\


## Model depth

In [19]:
data = [
    "homogeneous (abstract) full_emb linear_layer dropout baseline",
    "homogeneous (abstract) full_emb linear_layer dropout 1_conv_layer",
    "homogeneous (abstract) full_emb linear_layer dropout 1_conv_layer 2_linear",
]

first_rows = [
    "2 Conv Layers",
    "1 Conv Layer",
    "1 Conv 2 Linear Layers",
]

print_latex_table(data, first_rows)


Test Data

\textbf{2 Conv Layers} & 0.81 & 0.89 & 0.85 & 0.84 & 0.89 & 0.79 \\
\textbf{1 Conv Layer} & 0.81 & 0.91 & 0.86 & 0.85 & 0.91 & 0.78 \\
\textbf{1 Conv 2 Linear Layers} & 0.80 & 0.87 & 0.83 & 0.83 & 0.87 & 0.79 \\

Eval Data

\textbf{2 Conv Layers} & 0.92 & 0.78 & 0.84 & 0.85 & 0.78 & 0.93 \\
\textbf{1 Conv Layer} & 0.95 & 0.80 & 0.87 & 0.88 & 0.80 & 0.96 \\
\textbf{1 Conv 2 Linear Layers} & 0.96 & 0.82 & 0.89 & 0.90 & 0.82 & 0.97 \\


## Neighborhood size

In [20]:
data = [
    "homogeneous (abstract) full_emb linear_layer dropout neighborhood 1",
    "homogeneous (abstract) full_emb linear_layer dropout baseline",
    "homogeneous (abstract) full_emb linear_layer dropout neighbourhood 3",
]

first_rows = [
    "1 Hop",
    "2 Hops",
    "3 Hops",
]

print_latex_table(data, first_rows)


Test Data

\textbf{1 Hop} & 0.91 & 0.93 & 0.92 & 0.92 & 0.93 & 0.91 \\
\textbf{2 Hops} & 0.81 & 0.89 & 0.85 & 0.84 & 0.89 & 0.79 \\
\textbf{3 Hops} & 0.73 & 0.90 & 0.81 & 0.79 & 0.90 & 0.67 \\

Eval Data

\textbf{1 Hop} & 0.92 & 0.85 & 0.88 & 0.89 & 0.85 & 0.92 \\
\textbf{2 Hops} & 0.92 & 0.78 & 0.84 & 0.85 & 0.78 & 0.93 \\
\textbf{3 Hops} & 0.93 & 0.63 & 0.75 & 0.79 & 0.63 & 0.96 \\
