In [4]:
from pathlib import Path
from typing import Dict, Any

import pandas as pd
from dotenv import load_dotenv


In [5]:
cwd = Path.cwd()
candidates = [cwd, cwd.parent, cwd.parent.parent]

project_root = None
for c in candidates:
    if (c / "data").exists() and (c / "src").exists():
        project_root = c
        break

if project_root is None:
    project_root = cwd.parent

print("Detected project root:", project_root)

env_path = project_root / ".env"
if env_path.exists():
    load_dotenv(env_path)

DATA_DIR = project_root / "data"
PROCESSED_DIR = DATA_DIR / "processed"


Detected project root: C:\Users\Admin\OneDrive\Desktop\Capstone-MAT496


In [55]:
try:
    qgen_app
    print("qgen_app is already available from Notebook 4.")
except NameError:
    raise RuntimeError(
        "qgen_app is not defined in this kernel. "
        "Run Notebook 4 in the same session (or refactor the graph into src/ for import)."
    )


qgen_app is already available from Notebook 4.


In [56]:
from typing import TypedDict, List
from typing_extensions import Annotated
from operator import add


In [57]:
class QGenState(TypedDict):
    topic: str
    mode: str
    n_target: int
    retrieved_context: List[Dict[str, Any]]
    last_prompt: str
    last_raw_output: str
    questions_mcq: Annotated[List[Dict[str, Any]], add]
    errors: Annotated[List[str], add]
    done: bool


In [58]:
def run_qbank_pipeline(topic: str, n_target: int = 3) -> Dict[str, Any]:
    """
    Convenience wrapper to run the LangGraph MCQ pipeline end-to-end
    for a given topic, and return the final state.
    """
    initial_state: QGenState = {
        "topic": topic,
        "mode": "mcq",
        "n_target": n_target,
        "retrieved_context": [],
        "last_prompt": "",
        "last_raw_output": "",
        "questions_mcq": [],
        "errors": [],
        "done": False,
    }

    result_state = qgen_app.invoke(initial_state)
    return result_state


In [59]:
topics_to_test = [
    "probability distribution",
    "linear regression",
]

all_questions = []

for t in topics_to_test:
    print(f"\n=== Running pipeline for topic: {t} ===")
    state = run_qbank_pipeline(t, n_target=3)
    print("Done flag:", state["done"])
    print("Errors:", state["errors"])
    print("Number of MCQs:", len(state["questions_mcq"]))

    for q in state["questions_mcq"]:
        rec = q.copy()
        rec["run_topic"] = t
        all_questions.append(rec)

qbank_df = pd.DataFrame(all_questions)
display(qbank_df.head(10))



=== Running pipeline for topic: probability distribution ===
Done flag: False
Errors: []
Number of MCQs: 4

=== Running pipeline for topic: linear regression ===
Done flag: False
Errors: []
Number of MCQs: 4


Unnamed: 0,id,stem,options,correct_option_index,difficulty,topic,source_excerpt,source_doc_id,run_topic
0,probability distribution_mcq_1,Dummy question about probability distribution (1),"[Option A, Option B, Option C, Option D]",0,easy,probability distribution,Dummy excerpt 1.,dummy:page=0:chunk=0,probability distribution
1,probability distribution_mcq_2,Dummy question about probability distribution (2),"[Option A, Option B, Option C, Option D]",1,medium,probability distribution,Dummy excerpt 2.,dummy:page=0:chunk=1,probability distribution
2,probability distribution_mcq_1,Dummy question about probability distribution (1),"[Option A, Option B, Option C, Option D]",0,easy,probability distribution,Dummy excerpt 1.,dummy:page=0:chunk=0,probability distribution
3,probability distribution_mcq_2,Dummy question about probability distribution (2),"[Option A, Option B, Option C, Option D]",1,medium,probability distribution,Dummy excerpt 2.,dummy:page=0:chunk=1,probability distribution
4,linear regression_mcq_1,Dummy question about linear regression (1),"[Option A, Option B, Option C, Option D]",0,easy,linear regression,Dummy excerpt 1.,dummy:page=0:chunk=0,linear regression
5,linear regression_mcq_2,Dummy question about linear regression (2),"[Option A, Option B, Option C, Option D]",1,medium,linear regression,Dummy excerpt 2.,dummy:page=0:chunk=1,linear regression
6,linear regression_mcq_1,Dummy question about linear regression (1),"[Option A, Option B, Option C, Option D]",0,easy,linear regression,Dummy excerpt 1.,dummy:page=0:chunk=0,linear regression
7,linear regression_mcq_2,Dummy question about linear regression (2),"[Option A, Option B, Option C, Option D]",1,medium,linear regression,Dummy excerpt 2.,dummy:page=0:chunk=1,linear regression


In [60]:
out_path = PROCESSED_DIR / "end_to_end_mcqs_dummy.csv"
qbank_df.to_csv(out_path, index=False, encoding="utf-8")
print("Saved end-to-end MCQs to:", out_path)


Saved end-to-end MCQs to: C:\Users\Admin\OneDrive\Desktop\Capstone-MAT496\data\processed\end_to_end_mcqs_dummy.csv
