In [1]:
import os
import pandas as pd
import subprocess
import re
import math

# Ensure we are in the project root
if os.getcwd().endswith("src"):
    os.chdir("..")
    
print(f"Current working directory: {os.getcwd()}")

Current working directory: /Users/dkandboz/Documents/college/deep-learning-gnn-project


## Config

In [None]:
from dataclasses import dataclass


@dataclass
class Config:
    RUN_4_1_GNN: bool = False
    RUN_4_2_MEAN_TEACHER: bool = False
    RUN_4_3_NCPS: bool = False
    RUN_4_4_LAPLACE: bool = False
    RUN_4_5_GRAPHMIX: bool = False
    SHOULD_TRAIN: bool = False
    
CONFIG = Config()

## Utils

In [3]:
def run_command(command):
    """Runs a shell command and returns the output."""
    print(f"Running: {command}")
    # Capture both stdout and stderr
    process = subprocess.run(command, shell=True, capture_output=True, text=True)
    
    if process.returncode != 0:
        print(f"Error running command: {command}")
        print("STDERR:", process.stderr)
    
    return process.stdout

def parse_mse(output):
    """Parses the FINAL_TEST_MSE from the output of src/test.py"""
    # Look for the specific marker we added to src/test.py
    match = re.search(r"FINAL_TEST_MSE: ([\d\.]+)", output)
    if match:
        return float(match.group(1))
    return None

## 4.1 GNN Ablation study

In [4]:
def gnn_ablation_study():   
    if not CONFIG.RUN_4_1_GNN:
        print("Skipping GNN Ablation Study as per configuration.")
        return []
    experiments_list = [
        {
            "id": "1",
            "label": "Baseline GCN (StepLR, No Reg.)",
            "config": "ablation_gnn/01_gcn_baseline",
            "phase": "Phase 1: Regularization & Optimization (GCN Backbone)",
        },
        {
            "id": "2",
            "label": "+ Residual Connections",
            "config": "ablation_gnn/02_gcn_residuals",
            "phase": "Phase 1: Regularization & Optimization (GCN Backbone)",
        },
        {
            "id": "3a",
            "label": "+ Batch Normalization",
            "config": "ablation_gnn/03a_gcn_batchnorm",
            "phase": "Phase 1: Regularization & Optimization (GCN Backbone)",
        },
        {
            "id": "3b",
            "label": "+ Layer Normalization",
            "config": "ablation_gnn/03b_gcn_layernorm",
            "phase": "Phase 1: Regularization & Optimization (GCN Backbone)",
        },
        {
            "id": "3c",
            "label": "+ Graph Normalization",
            "config": "ablation_gnn/03c_gcn_graphnorm",
            "phase": "Phase 1: Regularization & Optimization (GCN Backbone)",
        },
        {
            "id": "4",
            "label": "+ Dropout (p=0.1)",
            "config": "ablation_gnn/04_gcn_dropout",
            "phase": "Phase 1: Regularization & Optimization (GCN Backbone)",
        },
        {
            "id": "5",
            "label": "+ Cosine Annealing Scheduler",
            "config": "ablation_gnn/05_gcn_cosine",
            "phase": "Phase 1: Regularization & Optimization (GCN Backbone)",
        },
        {
            "id": "6a",
            "label": "GCN (Reference)",
            "config": "ablation_gnn/06a_gcn",
            "phase": "Phase 2: Model Selection (Fixed Hyperparams)",
        },
        {
            "id": "6b",
            "label": "GraphSAGE",
            "config": "ablation_gnn/06b_sage",
            "phase": "Phase 2: Model Selection (Fixed Hyperparams)",
        },
        {
            "id": "6c",
            "label": "GAT",
            "config": "ablation_gnn/06c_gat",
            "phase": "Phase 2: Model Selection (Fixed Hyperparams)",
        },
        {
            "id": "6d",
            "label": "GIN",
            "config": "ablation_gnn/06d_gin",
            "phase": "Phase 2: Model Selection (Fixed Hyperparams)",
        },
        {
            "id": "7",
            "label": "GIN + Jumping Knowledge (Concat)",
            "config": "ablation_gnn/07_gin_jk",
            "phase": "Phase 3: Scaling & Refinement (GIN Backbone)",
        },
        {
            "id": "8",
            "label": "Deep GIN (12 Layers, 512 Hidden)",
            "config": "ablation_gnn/08_deep_gin",
            "phase": "Phase 3: Scaling & Refinement (GIN Backbone)",
        },
    ]

    results = []

    for exp in experiments_list:
        exp_id = exp["id"]
        label = exp["label"]
        cfg = exp["config"]
        phase = exp["phase"]

        print(f"\n=== Processing {exp_id} - {label} ===")

        # 1. Training (optional)
        if CONFIG.SHOULD_TRAIN:
            train_cmd = f"python src/run.py +experiments={cfg} trainer.train.total_epochs=1 save_model=false logger.disable=true"
            run_command(train_cmd)

        # 2. Testing
        # By default, we expect model files in models/<config>.pt
        # for example: models/ablation_gnn/01_gcn_baseline.pt
        model_path = f"models/{cfg}.pt"

        if not os.path.exists(model_path) and not CONFIG.SHOULD_TRAIN:
            print(f"Warning: Model file not found at {model_path}. Skipping evaluation.")
            results.append(
                {"ID": exp_id, "Configuration": label, "Phase": phase, "Test MSE": None}
            )
            continue

        test_cmd = f"python src/test.py +experiments={cfg} model_path={model_path}"
        output = run_command(test_cmd)
        mse = parse_mse(output)

        if mse is not None:
            print(f"Parsed Test MSE: {mse}")
            results.append(
                {"ID": exp_id, "Configuration": label, "Phase": phase, "Test MSE": mse}
            )
        else:
            print("Could not parse MSE from output.")
            results.append(
                {"ID": exp_id, "Configuration": label, "Phase": phase, "Test MSE": None}
            )
    return results

In [5]:
results = gnn_ablation_study()

Skipping GNN Ablation Study as per configuration.


In [6]:
def generate_latex_table(results):
    if not results:
        print("No results to generate LaTeX table.")
        return
    df_results = pd.DataFrame(results)

    # Clean up Configuration column
    if not df_results.empty:
        df_results["Configuration Display"] = df_results["Configuration"].astype(str)

    # --- FIXED: Robust LaTeX Escape Function ---
    def latex_escape(s):
        """
        Escapes special characters for LaTeX using a single-pass regex 
        to prevent double-escaping (e.g. replacing '\' then '{').
        """
        if s is None:
            return ""
        s = str(s)
        
        # Map of characters to their LaTeX escape sequences
        # Note: Order doesn't matter in the regex approach
        chars = {
            '&': r'\&',
            '%': r'\%',
            '$': r'\$',
            '#': r'\#',
            '_': r'\_',
            '{': r'\{',
            '}': r'\}',
            '~': r'\textasciitilde{}',
            '^': r'\textasciicircum{}',
            '\\': r'\textbackslash{}',
        }
        
        # Create a regex that matches any of the keys
        pattern = re.compile('|'.join(re.escape(k) for k in chars.keys()))
        
        # Substitute using the dictionary
        return pattern.sub(lambda m: chars[m.group(0)], s)

    # Group by phase
    phases = df_results["Phase"].dropna().unique().tolist() if not df_results.empty else []

    # Find best overall MSE for bolding
    valid_mses = df_results["Test MSE"][df_results["Test MSE"].notnull()].tolist()
    best_overall = min(valid_mses) if valid_mses else None

    lines = []
    lines.append(r"\begin{table}[h!]")
    lines.append(r"    \centering")
    lines.append(r"    \caption{Ablation study of GNN architecture and regularization techniques on QM9. We incrementally add features to a GCN baseline, compare architectures, and scale the best performing model (GIN).}")
    lines.append(r"    \label{tab:gnn_ablation}")
    # Resize box to fit column width
    lines.append(r"    \resizebox{\columnwidth}{!}{%")
    lines.append(r"    \begin{tabular}{llc}")
    lines.append(r"        \toprule")
    lines.append(r"        \textbf{ID} & \textbf{Configuration / Modification} & \textbf{Test MSE} \\")
    lines.append(r"        \midrule")

    for i, phase in enumerate(phases):
        phase_escaped = latex_escape(phase)
        
        # --- FIXED: Correct string syntax for multicolumn ---
        # Using an f-string with double braces {{ }} for LaTeX literals
        # and 4 backslashes \\\\ to produce a double backslash \\ in the output
        lines.append(f"        \\multicolumn{{3}}{{l}}{{\\textit{{{phase_escaped}}}}} \\\\")
        
        phase_rows = df_results[df_results["Phase"] == phase]
        
        for _, row in phase_rows.iterrows():
            id_ = row["ID"]
            cfg = row["Configuration Display"]
            cfg_escaped = latex_escape(cfg)
            mse = row["Test MSE"]
            
            if mse is None or (isinstance(mse, float) and math.isnan(mse)):
                mse_str = "--"
            else:
                mse_fmt = f"{mse:.4f}"
                # Bold if best
                if best_overall is not None and abs(mse - best_overall) < 1e-12:
                    mse_str = r"\textbf{" + mse_fmt + "}"
                else:
                    mse_str = mse_fmt
            
            # --- FIXED: Row Ending ---
            # 4 backslashes \\\\ in python string -> 2 backslashes \\ in output file (LaTeX newline)
            lines.append(f"        {id_} & {cfg_escaped} & {mse_str} \\\\")
        
        # Add midrule only if it's not the last phase (standard booktabs style)
        if i < len(phases) - 1:
            lines.append(r"        \midrule")

    lines.append(r"        \bottomrule")
    lines.append(r"    \end{tabular}%")
    lines.append(r"    }")
    lines.append(r"\end{table}")

    latex_table = "\n".join(lines)
    print(latex_table)
    
generate_latex_table(results)

No results to generate LaTeX table.


## 4.2 Mean Teacher

In [7]:
def mean_teacher():   
    if not CONFIG.RUN_4_2_MEAN_TEACHER:
        print("Skipping Mean Teacher as per configuration.")
        return []
    experiments_list = [
        {
            "id": "1",
            "label": "Baseline GCN (StepLR, No Reg.)",
            "config": "ablation_gnn/01_gcn_baseline",
            "phase": "Phase 1: Regularization & Optimization (GCN Backbone)",
        }
    ]

    results = []

    for exp in experiments_list:
        exp_id = exp["id"]
        label = exp["label"]
        cfg = exp["config"]
        phase = exp["phase"]

        print(f"\n=== Processing {exp_id} - {label} ===")

        # 1. Training (optional)
        if CONFIG.SHOULD_TRAIN:
            train_cmd = f"python src/run.py +experiments={cfg} trainer.train.total_epochs=1 save_model=false logger.disable=true"
            run_command(train_cmd)

        # 2. Testing
        # By default, we expect model files in models/<config>.pt
        # for example: models/ablation_gnn/01_gcn_baseline.pt
        model_path = f"models/{cfg}.pt"

        if not os.path.exists(model_path) and not CONFIG.SHOULD_TRAIN:
            print(f"Warning: Model file not found at {model_path}. Skipping evaluation.")
            results.append(
                {"ID": exp_id, "Configuration": label, "Phase": phase, "Test MSE": None}
            )
            continue

        test_cmd = f"python src/test.py +experiments={cfg} model_path={model_path}"
        output = run_command(test_cmd)
        mse = parse_mse(output)

        if mse is not None:
            print(f"Parsed Test MSE: {mse}")
            results.append(
                {"ID": exp_id, "Configuration": label, "Phase": phase, "Test MSE": mse}
            )
        else:
            print("Could not parse MSE from output.")
            results.append(
                {"ID": exp_id, "Configuration": label, "Phase": phase, "Test MSE": None}
            )
    return results

In [8]:
results = mean_teacher()

Skipping Mean Teacher as per configuration.


## 4.3 NCPS

In [9]:
def ncps():   
    if not CONFIG.RUN_4_3_NCPS:
        print("Skipping NCPS as per configuration.")
        return []
    experiments_list = [
        {
            "id": "1",
            "label": "Baseline GCN (StepLR, No Reg.)",
            "config": "ablation_gnn/01_gcn_baseline",
            "phase": "Phase 1: Regularization & Optimization (GCN Backbone)",
        }
    ]

    results = []

    for exp in experiments_list:
        exp_id = exp["id"]
        label = exp["label"]
        cfg = exp["config"]
        phase = exp["phase"]

        print(f"\n=== Processing {exp_id} - {label} ===")

        # 1. Training (optional)
        if CONFIG.SHOULD_TRAIN:
            train_cmd = f"python src/run.py +experiments={cfg} trainer.train.total_epochs=1 save_model=false logger.disable=true"
            run_command(train_cmd)

        # 2. Testing
        # By default, we expect model files in models/<config>.pt
        # for example: models/ablation_gnn/01_gcn_baseline.pt
        model_path = f"models/{cfg}.pt"

        if not os.path.exists(model_path) and not CONFIG.SHOULD_TRAIN:
            print(f"Warning: Model file not found at {model_path}. Skipping evaluation.")
            results.append(
                {"ID": exp_id, "Configuration": label, "Phase": phase, "Test MSE": None}
            )
            continue

        test_cmd = f"python src/test.py +experiments={cfg} model_path={model_path}"
        output = run_command(test_cmd)
        mse = parse_mse(output)

        if mse is not None:
            print(f"Parsed Test MSE: {mse}")
            results.append(
                {"ID": exp_id, "Configuration": label, "Phase": phase, "Test MSE": mse}
            )
        else:
            print("Could not parse MSE from output.")
            results.append(
                {"ID": exp_id, "Configuration": label, "Phase": phase, "Test MSE": None}
            )
    return results
            

In [10]:
results = ncps()

Skipping NCPS as per configuration.


## 4.4 Laplace

In [11]:
def laplace():   
    if not CONFIG.RUN_4_4_LAPLACE:
        print("Skipping Laplace as per configuration.")
        return []
    experiments_list = [
        {
            "id": "1",
            "label": "Baseline GCN (StepLR, No Reg.)",
            "config": "ablation_gnn/01_gcn_baseline",
            "phase": "Phase 1: Regularization & Optimization (GCN Backbone)",
        }
    ]

    results = []

    for exp in experiments_list:
        exp_id = exp["id"]
        label = exp["label"]
        cfg = exp["config"]
        phase = exp["phase"]

        print(f"\n=== Processing {exp_id} - {label} ===")

        # 1. Training (optional)
        if CONFIG.SHOULD_TRAIN:
            train_cmd = f"python src/run.py +experiments={cfg} trainer.train.total_epochs=1 save_model=false logger.disable=true"
            run_command(train_cmd)

        # 2. Testing
        # By default, we expect model files in models/<config>.pt
        # for example: models/ablation_gnn/01_gcn_baseline.pt
        model_path = f"models/{cfg}.pt"

        if not os.path.exists(model_path) and not CONFIG.SHOULD_TRAIN:
            print(f"Warning: Model file not found at {model_path}. Skipping evaluation.")
            results.append(
                {"ID": exp_id, "Configuration": label, "Phase": phase, "Test MSE": None}
            )
            continue

        test_cmd = f"python src/test.py +experiments={cfg} model_path={model_path}"
        output = run_command(test_cmd)
        mse = parse_mse(output)

        if mse is not None:
            print(f"Parsed Test MSE: {mse}")
            results.append(
                {"ID": exp_id, "Configuration": label, "Phase": phase, "Test MSE": mse}
            )
        else:
            print("Could not parse MSE from output.")
            results.append(
                {"ID": exp_id, "Configuration": label, "Phase": phase, "Test MSE": None}
            )
    return results
            

In [12]:
results = laplace()

Skipping Laplace as per configuration.


## 4.5 Graph Mix

In [13]:
def graph_mix():
    if not CONFIG.RUN_4_5_GRAPHMIX:
        print("Skipping GraphMix as per configuration.")
        return []
    experiments_list = [
        {
            "id": "1",
            "label": "GraphMix 10% Labelled Data",
            "config": "graph_mixup/gin_best_mixup",
            "phase": "",
        },
        {
            "id": "2",
            "label": "GraphMix 20% Labelled Data",
            "config": "graph_mixup/gin_best_mixup_20",
            "phase": "",
        },
        {
            "id": "3",
            "label": "GraphMix 5% Labelled Data",
            "config": "graph_mixup/gin_best_mixup_5",
            "phase": "",
        },
        {
            "id": "4",
            "label": "GraphMix 1% Labelled Data",
            "config": "graph_mixup/gin_best_mixup_1",
            "phase": "",
        },
    ]

    results = []

    for exp in experiments_list:
        exp_id = exp["id"]
        label = exp["label"]
        cfg = exp["config"]
        phase = exp["phase"]

        print(f"\n=== Processing {exp_id} - {label} ===")

        # 1. Training (optional)
        if CONFIG.SHOULD_TRAIN:
            train_cmd = f"python src/run.py +experiments={cfg} trainer.train.total_epochs=1 save_model=false logger.disable=true"
            run_command(train_cmd)

        # 2. Testing
        # By default, we expect model files in models/<config>.pt
        # for example: models/ablation_gnn/01_gcn_baseline.pt
        model_path = f"models/{cfg}.pt"

        if not os.path.exists(model_path) and not CONFIG.SHOULD_TRAIN:
            print(
                f"Warning: Model file not found at {model_path}. Skipping evaluation."
            )
            results.append(
                {"ID": exp_id, "Configuration": label, "Phase": phase, "Test MSE": None}
            )
            continue

        test_cmd = f"python src/test.py +experiments={cfg} model_path={model_path}"
        output = run_command(test_cmd)
        mse = parse_mse(output)

        if mse is not None:
            print(f"Parsed Test MSE: {mse}")
            results.append(
                {"ID": exp_id, "Configuration": label, "Phase": phase, "Test MSE": mse}
            )
        else:
            print("Could not parse MSE from output.")
            results.append(
                {"ID": exp_id, "Configuration": label, "Phase": phase, "Test MSE": None}
            )
    return results

In [14]:
results = graph_mix()
display(results)


=== Processing 1 - GraphMix 10% Labelled Data ===
Running: python src/test.py +experiments=graph_mixup/gin_best_mixup model_path=models/graph_mixup/gin_best_mixup.pt
Parsed Test MSE: 0.026007591613701413

=== Processing 2 - GraphMix 20% Labelled Data ===
Running: python src/test.py +experiments=graph_mixup/gin_best_mixup_20 model_path=models/graph_mixup/gin_best_mixup_20.pt
Parsed Test MSE: 0.019765003451279232

=== Processing 3 - GraphMix 5% Labelled Data ===
Running: python src/test.py +experiments=graph_mixup/gin_best_mixup_5 model_path=models/graph_mixup/gin_best_mixup_5.pt
Parsed Test MSE: 0.03805058130196163

=== Processing 4 - GraphMix 1% Labelled Data ===
Running: python src/test.py +experiments=graph_mixup/gin_best_mixup_1 model_path=models/graph_mixup/gin_best_mixup_1.pt
Parsed Test MSE: 0.07902580499649048


[{'ID': '1',
  'Configuration': 'GraphMix 10% Labelled Data',
  'Phase': '',
  'Test MSE': 0.026007591613701413},
 {'ID': '2',
  'Configuration': 'GraphMix 20% Labelled Data',
  'Phase': '',
  'Test MSE': 0.019765003451279232},
 {'ID': '3',
  'Configuration': 'GraphMix 5% Labelled Data',
  'Phase': '',
  'Test MSE': 0.03805058130196163},
 {'ID': '4',
  'Configuration': 'GraphMix 1% Labelled Data',
  'Phase': '',
  'Test MSE': 0.07902580499649048}]

In [15]:
# Generate a small LaTeX table from the existing `results` list and print it.
# This cell uses variables and imports already present in the notebook.

def _latex_escape(s):
    if s is None:
        return ""
    s = str(s)
    chars = {
        '&': r'\&',
        '%': r'\%',
        '$': r'\$',
        '#': r'\#',
        '_': r'\_',
        '{': r'\{',
        '}': r'\}',
        '~': r'\textasciitilde{}',
        '^': r'\textasciicircum{}',
        '\\': r'\textbackslash{}',
    }
    pattern = re.compile('|'.join(re.escape(k) for k in chars.keys()))
    return pattern.sub(lambda m: chars[m.group(0)], s)

if not results:
    print("No results available to build LaTeX table.")
else:
    # Find best (minimum) MSE among valid entries
    valid_mses = [r["Test MSE"] for r in results if r.get("Test MSE") is not None]
    best_mse = min(valid_mses) if valid_mses else None

    lines = []
    lines.append(r"\begin{table}[h]")
    lines.append(r"  \centering")
    lines.append(r"  \caption{GraphMix evaluation results}")
    lines.append(r"  \begin{tabular}{llc}")
    lines.append(r"    \toprule")
    lines.append(r"    ID & Configuration & Test MSE \\")
    lines.append(r"    \midrule")

    for row in results:
        id_ = _latex_escape(row.get("ID", ""))
        cfg = _latex_escape(row.get("Configuration", ""))
        mse = row.get("Test MSE")
        if mse is None or (isinstance(mse, float) and math.isnan(mse)):
            mse_str = "--"
        else:
            mse_fmt = f"{mse:.4f}"
            if best_mse is not None and abs(mse - best_mse) < 1e-12:
                mse_str = r"\textbf{" + mse_fmt + "}"
            else:
                mse_str = mse_fmt
        lines.append(f"    {id_} & {cfg} & {mse_str} \\\\")
    lines.append(r"    \bottomrule")
    lines.append(r"  \end{tabular}")
    lines.append(r"\end{table}")

    latex_table = "\n".join(lines)
    print(latex_table)

\begin{table}[h]
  \centering
  \caption{GraphMix evaluation results}
  \begin{tabular}{llc}
    \toprule
    ID & Configuration & Test MSE \\
    \midrule
    1 & GraphMix 10\% Labelled Data & 0.0260 \\
    2 & GraphMix 20\% Labelled Data & \textbf{0.0198} \\
    3 & GraphMix 5\% Labelled Data & 0.0381 \\
    4 & GraphMix 1\% Labelled Data & 0.0790 \\
    \bottomrule
  \end{tabular}
\end{table}
