In [3]:
LLM_MODEL = "gemini-2.0-flash"  # e.g., "gemini-1.5-pro"

import os
import re
import json
import uuid
import asyncio
import docx
import xml.etree.ElementTree as ET
import numpy as np
from tqdm.auto import tqdm
from PyPDF2 import PdfReader
from typing import List, Iterable, Dict, Any, Optional
from pydantic import BaseModel, Field, ValidationError
from src.requirement_builder import HealthcareStoryExtractor
from google.cloud import bigquery
from langchain_google_vertexai import VertexAIEmbeddings, VertexAI
# Auto-detect project from your auth context
bq_client = bigquery.Client()
PROJECT_ID = bq_client.project
TEST = True   # set this to False for full run

# Configs (override via env)
FILE_PATH = os.environ.get("INPUT_FILE", "data/srs.pdf")
OUTPUT_JSON = os.environ.get("OUTPUT_JSON", "generated_user_stories.json")
DEDUPE = os.environ.get("DEDUPE", "true").lower() in {"1", "true", "yes"}
DUP_THRESHOLD = float(os.environ.get("DUP_THRESHOLD", "0.99"))
EXPORT = os.environ.get("EXPORT_TO_BQ", "false").lower() in {"1", "true", "yes"}
BATCH_LLM_SIZE = int(os.environ.get("BATCH_LLM_SIZE", "20"))
LLM_INNER_BATCH = int(os.environ.get("LLM_INNER_BATCH", "5"))

extractor = HealthcareStoryExtractor(project_id=PROJECT_ID)

# Run extraction
stories = await  extractor.extract_from_file(
        FILE_PATH,
        dedupe=DEDUPE,
        dup_threshold=DUP_THRESHOLD,
        batch_llm_size=BATCH_LLM_SIZE,
        llm_inner_batch=LLM_INNER_BATCH,
    TEST=TEST
    
    )
# Save test cases to CSV
# export_testcases_csv(stories, extractor._last_requirements, out_csv="testcases.csv")


# Save to JSON
with open(OUTPUT_JSON, "w", encoding="utf-8") as f:
    json.dump(stories, f, indent=2, ensure_ascii=False)
print(f"✅ Extracted user stories saved to {OUTPUT_JSON}")

import json

# Save segmented requirements
with open("requirements.json", "w", encoding="utf-8") as f:
    json.dump(extractor._last_requirements, f, indent=2, ensure_ascii=False)

# Save generated user stories
with open("stories.json", "w", encoding="utf-8") as f:
    json.dump(stories, f, indent=2, ensure_ascii=False)

print("✅ Saved requirements.json and stories.json")

# Optional: export to BigQuery
# if EXPORT:
#     extractor.export_to_bq(stories)


  from .autonotebook import tqdm as notebook_tqdm


ValidationError: 1 validation error for VertexAIEmbeddings
  Value error, Unable to find your project. Please provide a project ID by:
- Passing a constructor argument
- Using vertexai.init()
- Setting project using 'gcloud config set project my-project'
- Setting a GCP environment variable
- To create a Google Cloud project, please follow guidance at https://developers.google.com/workspace/guides/create-project [type=value_error, input_value={'project': '', 'location... 'default_metadata': ()}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/value_error

In [4]:
stories_file = "outputs/stories.json"

with open(stories_file, "r", encoding="utf-8") as f:
    stories = json.load(f)


In [5]:
from src.testcase_generator import TestCaseGenerator

tcgen = TestCaseGenerator()
result = tcgen.generate(
    stories,
    feature_dir="features",
    steps_dir="steps",
    framework="pytest-bdd",     # or "behave"
    feature_per_epic=True,
    traceability_csv="traceability.csv",
)

🧪 Wrote 4 Gherkin feature file(s) to features
🧩 Wrote step stubs for pytest-bdd to steps/test_steps_bdd.py
📊 Wrote RTM to traceability.csv (12 rows)
✅ All requirements have at least one scenario.


In [32]:
from toolchain_connector import ToolChainConnector
# Create an instance of the class
connector = ToolChainConnector()

# Use the methods to export the data
connector.export_to_jira_csv(
    stories,
    path="jira_testcases.csv",
    project_key="",
    default_labels=["auto-generated", "vertex-ai", "traceable"],
    test_type="Manual",
)

connector.export_to_ado_csv(
    stories,
    path="ado_testcases.csv",
    area_path="Healthcare\\DayHealth",
    iteration_path="Release 1",
)

🗂️  Wrote Jira-friendly CSV to jira_testcases.csv
🗂️  Wrote ADO-friendly CSV to ado_testcases.csv


PosixPath('ado_testcases.csv')

In [33]:
from coverage_analyser import CoverageAnalyzer

# Use the class
analyzer = CoverageAnalyzer(
    requirements_path="requirements.json",
    stories_path="stories.json",
    testcases_path="testcases.csv"
)

# Run the full analysis
analyzer.run_analysis()

✔️ Data loaded successfully.
✅ Requirement-level coverage: coverage_matrix.csv
✅ Epic-level rollup: epic_coverage.csv


In [34]:
from compliance_validator import 
compliance_report=build_compliance_report(
    stories_path="stories.json",
    testcases_path="testcases.csv",
    out_csv="compliance_evidence.csv",
    out_xlsx="compliance_evidence.xlsx",
    project_id=PROJECT_ID,
    use_embeddings=True
)


FileNotFoundError: [Errno 2] No such file or directory: 'testcases.csv'

In [28]:
compliance_report.head()

Unnamed: 0,Requirement ID,Story Id,Epic,Priority,User Story,Pages (Citations),Alignment Score,Needs Review,Matched Clauses,Clause Scores,Expected Controls,Detected Controls,Missing Controls,Evidence (Story + Steps)
0,AUTO-1,cde5b225-2357-4adb-93ec-da487789af32,,Must,"As a Doctor, I want to access patient informat...",13;1,0.286,False,FDA 21 CFR Part 11 11.10(e); ISO 13485 4.2.5; ...,0.546; 0.544; 0.537; 0.503,audit_trail; data_integrity; e_signature; rbac...,,audit_trail; data_integrity; e_signature; rbac...,"As a Doctor, I want to access patient informat..."
1,1.1,e38976ef-ecde-4dad-be96-946e70ee23c5,Digitize patient records and automate tracking.,Must,"As a Nurse, I want to access digitized patient...",4;4;6,0.449,False,FDA 21 CFR Part 11 11.10(e); FDA 21 CFR Part 1...,0.649; 0.618; 0.609; 0.574,audit_trail; data_integrity; e_signature; rbac...,,audit_trail; data_integrity; e_signature; rbac...,"As a Nurse, I want to access digitized patient..."
2,1.4,84a207e5-df27-4209-b80b-2fceaac9c3f0,Replace Trillium's current system with a new s...,Must,"As a Doctor, I want to view patient data store...",5;6,0.166,False,FDA 21 CFR Part 11 11.10(e); ISO 13485 4.2.5; ...,0.633; 0.602; 0.6; 0.568,audit_trail; data_integrity; e_signature; rbac...,,audit_trail; data_integrity; e_signature; rbac...,"As a Doctor, I want to view patient data store..."
3,2,fbaab98c-274f-47c7-82cc-759b1def04a7,,Must,"As a Doctor, I want to view audit history repo...",8;8,0.0,True,FDA 21 CFR Part 11 11.10(e); ISO 13485 4.2.5; ...,0.711; 0.646; 0.601; 0.593,audit_trail; data_integrity; e_signature; rbac...,,audit_trail; data_integrity; e_signature; rbac...,"As a Doctor, I want to view audit history repo..."
4,1,ab2d2e4b-4c15-4556-8c51-fe1916d29d7a,,Must,"As a Day Health staff member, I want to digiti...",4;4;4,0.024,True,ISO 13485 4.2.5; FDA 21 CFR Part 11 11.10(e); ...,0.557; 0.552; 0.525; 0.519,audit_trail; data_integrity; e_signature; rbac...,,audit_trail; data_integrity; e_signature; rbac...,"As a Day Health staff member, I want to digiti..."
