In [1]:
# %% Setup
import sys
from pathlib import Path

REPO_ROOT = Path.cwd().parent  # assumes notebooks/ is directly under repo root
sys.path.append(str(REPO_ROOT))

# URI resolver
def resolve_uri(uri: str) -> Path:
    """Resolve a logical URI like 'data:aiml_glossary.json' to a filesystem path."""
    prefix, name = uri.split(":", 1)
    if prefix == "data":
        return REPO_ROOT / "data" / name
    elif prefix == "output":
        return REPO_ROOT / "output" / name
    elif prefix == "visualizations":
        return REPO_ROOT / "visualizations" / name
    else:
        raise ValueError(f"Unknown URI prefix: {prefix}")

from src.generate_outputs import generate
from src.cluster_analysis import run_clustering
from src.semantic_clustering import run_semantic_clustering
from src.evaluate_clusters import evaluate_clusters
from src.enrich_glossary import enrich_glossary
from src.link_dictionary import build_link_dictionary
from src.coverage_report import generate_report


In [2]:
# %% Step 1: Generate outputs
generate("data:aiml_glossary.json", "output")

Terms written to /mnt/c/Users/ircal/OneDrive/AIML/aiml-glossary/output/terms.csv
Glossary copy written to /mnt/c/Users/ircal/OneDrive/AIML/aiml-glossary/output/glossary_copy.json


In [3]:
# %% Step 2: Build link dictionary
build_link_dictionary("data:aiml_glossary.json")

Link dictionary written to /mnt/c/Users/ircal/OneDrive/AIML/aiml-glossary/output/link_dictionary.json


In [4]:
# %% Step 3: Enrich glossary
enrich_glossary("data:aiml_glossary.json", "output:link_dictionary.json")

Enriched glossary written to /mnt/c/Users/ircal/OneDrive/AIML/aiml-glossary/output/enriched_glossary.json


In [5]:
# %% Step 4: Graph clustering
run_clustering("data:aiml_glossary.json", "output:link_dictionary.json")

Cluster assignments written to /mnt/c/Users/ircal/OneDrive/AIML/aiml-glossary/output/cluster_assignments.csv
Graph stats written to /mnt/c/Users/ircal/OneDrive/AIML/aiml-glossary/output/graph_stats.json
Visualization written to /mnt/c/Users/ircal/OneDrive/AIML/aiml-glossary/visualizations/glossary_clusters.png


In [6]:
# %% Step 5: Semantic clustering
run_semantic_clustering("data:aiml_glossary.json", num_clusters=8)

Semantic cluster assignments written to /mnt/c/Users/ircal/OneDrive/AIML/aiml-glossary/output/semantic_cluster_assignments.csv
Visualization written to /mnt/c/Users/ircal/OneDrive/AIML/aiml-glossary/visualizations/semantic_clusters.png


In [7]:
# %% Step 6: Evaluate clusters
summary = evaluate_clusters()
print(summary)

ARI metrics written to /mnt/c/Users/ircal/OneDrive/AIML/aiml-glossary/output/ari_metrics.json
{'num_terms_compared': 96, 'ari': 0.062173416724279305}


In [8]:
# %% Step 7: Coverage report
generate_report()

Coverage Report:
‚úÖ data:aiml_glossary.json (True)
‚úÖ output:terms.csv (True)
‚úÖ output:glossary_copy.json (True)
‚úÖ output:link_dictionary.json (True)
‚úÖ output:enriched_glossary.json (True)
‚úÖ output:cluster_assignments.csv (True)
‚úÖ output:semantic_cluster_assignments.csv (True)
‚úÖ output:graph_stats.json (True)
‚úÖ output:ari_metrics.json (True)
‚úÖ output:coverage_report.json (True)
‚úÖ visualizations:glossary_clusters.png (True)
‚úÖ visualizations:semantic_clusters.png (True)

Coverage report written to /mnt/c/Users/ircal/OneDrive/AIML/aiml-glossary/output/coverage_report.json


{'data:aiml_glossary.json': True,
 'output:terms.csv': True,
 'output:glossary_copy.json': True,
 'output:link_dictionary.json': True,
 'output:enriched_glossary.json': True,
 'output:cluster_assignments.csv': True,
 'output:semantic_cluster_assignments.csv': True,
 'output:graph_stats.json': True,
 'output:ari_metrics.json': True,
 'output:coverage_report.json': True,
 'visualizations:glossary_clusters.png': True,
 'visualizations:semantic_clusters.png': True}

In [None]:
# %% Status Badge
from src.coverage_report import generate_report

report = generate_report()

# Determine overall status
if all(v is True for v in report.values()):
    badge = "![Workflow Status](https://img.shields.io/badge/Workflow-‚úÖ%20Passed-brightgreen)"
    print("Workflow ‚úÖ Passed")
else:
    badge = "![Workflow Status](https://img.shields.io/badge/Workflow-‚ùå%20Issues-red)"
    print("Workflow ‚ùå Issues detected")

# Display badge inline in notebook
from IPython.display import Markdown
Markdown(badge)


## ‚úÖ Expected Artifact Checklist

After running this runbook end‚Äëto‚Äëend, the following artifacts should exist:

### üìÇ Output directory (`output/`)
- [ ] `terms.csv` ‚Üí glossary terms and definitions in CSV format  
- [ ] `glossary_copy.json` ‚Üí copy of the glossary JSON  
- [ ] `link_dictionary.json` ‚Üí generated term‚Äëto‚Äëterm link dictionary  
- [ ] `enriched_glossary.json` ‚Üí glossary entries enriched with metadata (length, characters, link counts)  
- [ ] `cluster_assignments.csv` ‚Üí graph clustering assignments  
- [ ] `semantic_cluster_assignments.csv` ‚Üí semantic clustering assignments  
- [ ] `graph_stats.json` ‚Üí node/edge counts and cluster stats from graph clustering  
- [ ] `ari_metrics.json` ‚Üí evaluation metric (Adjusted Rand Index)  
- [ ] `coverage_report.json` ‚Üí summary of which artifacts exist ‚úÖ/‚ùå  

### üìÇ Visualizations directory (`visualizations/`)
- [ ] `glossary_clusters.png` ‚Üí graph clustering visualization  
- [ ] `semantic_clusters.png` ‚Üí bar chart of semantic cluster sizes  

### üìä Diagnostics
- Console output should show:
  - Graph stats (nodes, edges, clusters)  
  - Semantic cluster sizes  
  - ARI metric summary  
  - Coverage report with ‚úÖ/‚ùå markers
