# Test Modules

In [1]:
# Set project root directory and add `src` to path
import sys
from pathlib import Path

PROJECT_ROOT = '/scratch/edk202/word2gm-fast'
project_root = Path(PROJECT_ROOT)
src_path = project_root / 'src'

if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

# Import the notebook setup utilities
from word2gm_fast.utils.notebook_setup import setup_testing_notebook, enable_autoreload, run_silent_subprocess

# Enable autoreload for development
enable_autoreload()

# Set up environment
env = setup_testing_notebook(project_root=PROJECT_ROOT)

# Extract commonly used modules for convenience
tf = env['tensorflow']
np = env['numpy']
pd = env['pandas']
print_resource_summary = env['print_resource_summary']

Autoreload enabled
Project root: /scratch/edk202/word2gm-fast
TensorFlow version: 2.19.0
Device mode: GPU-enabled
Testing environment ready!
Project root: /scratch/edk202/word2gm-fast
TensorFlow version: 2.19.0
Device mode: GPU-enabled
Testing environment ready!


In [None]:
import datetime
from IPython.display import display, Markdown
from collections import OrderedDict

modules_to_test = [
    ('tests/test_corpus_to_dataset.py', 'Corpus processing'),
    ('tests/test_dataset_to_triplets.py', 'Triplet generation'),
    ('tests/test_index_vocab.py', 'Vocabulary indexing'), 
    ('tests/test_tfrecord_io.py', 'TFRecord I/O'),
    ('tests/test_word2gm_model.py', 'Word2GM model')
]

# Group tests by group label, preserving order
grouped = OrderedDict()
for test_file, group in modules_to_test:
    grouped.setdefault(group, []).append(test_file)

failures = []

for idx, (group, test_files) in enumerate(grouped.items()):
    for test_file in test_files:
        cmd_result = run_silent_subprocess(
            ['pytest', test_file, '--maxfail=1', '--disable-warnings', '-q', '-s'],
            capture_output=True, text=True
        )
        status = "PASS" if cmd_result.returncode == 0 else "FAIL"
        summary_lines = []
        if cmd_result.stdout:
            for line in cmd_result.stdout.splitlines():
                line = line.strip()
                if line.startswith("[TEST"):
                    summary_lines.append(f"[{status}] {line}")
        if summary_lines:
            for summary in summary_lines:
                display(Markdown(f'<pre>{summary}</pre>'))
        else:
            display(Markdown(f"[{status}] No [TEST ...] summary lines found for {test_file}. Raw output:"))
            if cmd_result.stdout:
                display(Markdown(f"<pre>{cmd_result.stdout}</pre>"))
            if cmd_result.stderr:
                display(Markdown(f"<pre>{cmd_result.stderr}</pre>"))
        if cmd_result.returncode != 0:
            failures.append((group, cmd_result))
    if idx < len(grouped) - 1:
        display(Markdown("&nbsp;"))

if failures:
    print("\nDetailed output for failed tests:")
    for group, cmd_result in failures:
        print(f"\n--- {group} ---")
        if cmd_result.stdout:
            print("stdout:\n" + cmd_result.stdout)
        if cmd_result.stderr:
            print("stderr:\n" + cmd_result.stderr)

<pre>[PASS] [TEST -- corpus_to_dataset] test_corpus_to_dataset: Only valid 5-grams kept (center word and at least one context word not UNK); correct summary stats</pre>

&nbsp;

<pre>[PASS] [TEST -- dataset_to_triplets] test_center_word_extraction: Center words match expected values</pre>

<pre>[PASS] [TEST -- dataset_to_triplets] test_context_word_extraction: Positive context tokens are valid</pre>

<pre>[PASS] [TEST -- dataset_to_triplets] test_multiple_triplets_per_line: Multiple triplets are generated per line, skipping UNK contexts</pre>

<pre>[PASS] [TEST -- dataset_to_triplets] test_negative_sampling_range: Negative samples are in the correct range and not UNK</pre>

<pre>[PASS] [TEST -- dataset_to_triplets] test_no_triplets_with_unk_context: No triplet has UNK as positive context word</pre>

&nbsp;

<pre>[PASS] [TEST -- index_vocab] test_vocab_table_contents: Table contains all expected tokens and handles OOV as UNK</pre>

&nbsp;

<pre>[PASS] [TEST -- tfrecord_io] test_write_and_load_triplets_compressed: Roundtrip TFRecord I/O for triplets (compressed)</pre>

<pre>[PASS] [TEST -- tfrecord_io] test_parse_triplet_example: Correct parsing of serialized triplet examples</pre>

<pre>[PASS] [TEST -- tfrecord_io] test_write_and_load_vocab_uncompressed: Roundtrip TFRecord I/O for vocabulary (uncompressed)</pre>

<pre>[PASS] [TEST -- tfrecord_io] test_write_and_load_vocab_compressed: Roundtrip TFRecord I/O for vocabulary (compressed)</pre>

<pre>[PASS] [TEST -- tfrecord_io] test_parse_vocab_example: Correct parsing of serialized vocabulary examples</pre>

<pre>[PASS] [TEST -- tfrecord_io] test_save_and_load_pipeline_artifacts: Saving and loading of all pipeline artifacts (vocab and triplets) as TFRecords</pre>

&nbsp;

<pre>[PASS] [TEST -- word2gm_model] test_model_configurations: Model runs with spherical=True, wout=True, num_mixtures=1</pre>

<pre>[PASS] [TEST -- word2gm_model] test_model_configurations: Model runs with spherical=False, wout=True, num_mixtures=1</pre>

<pre>[PASS] [TEST -- word2gm_model] test_model_configurations: Model runs with spherical=True, wout=False, num_mixtures=1</pre>

<pre>[PASS] [TEST -- word2gm_model] test_model_configurations: Model runs with spherical=False, wout=False, num_mixtures=1</pre>

<pre>[PASS] [TEST -- word2gm_model] test_model_configurations: Model runs with spherical=True, wout=True, num_mixtures=2</pre>

<pre>[PASS] [TEST -- word2gm_model] test_model_configurations: Model runs with spherical=False, wout=True, num_mixtures=2</pre>

<pre>[PASS] [TEST -- word2gm_model] test_model_configurations: Model runs with spherical=True, wout=False, num_mixtures=2</pre>

<pre>[PASS] [TEST -- word2gm_model] test_model_configurations: Model runs with spherical=False, wout=False, num_mixtures=2</pre>

<pre>[PASS] [TEST -- word2gm_model] test_model_configurations: Model runs with spherical=True, wout=True, num_mixtures=3</pre>

<pre>[PASS] [TEST -- word2gm_model] test_model_configurations: Model runs with spherical=False, wout=True, num_mixtures=3</pre>

<pre>[PASS] [TEST -- word2gm_model] test_model_configurations: Model runs with spherical=True, wout=False, num_mixtures=3</pre>

<pre>[PASS] [TEST -- word2gm_model] test_model_configurations: Model runs with spherical=False, wout=False, num_mixtures=3</pre>