In [6]:
import json
import yaml
from agents.question_model import QAgent

# Load topics
with open("assets/topics.json") as f:
    topics = json.load(f)

# Load ICL samples
inc_samples = QuestioningAgent.load_icl_samples("assets/topics_example.json")

# Create agent
agent_base = QAgent(use_lora=False)

# Load generation config
with open("qgen.yaml") as f:
    gen_kwargs = yaml.safe_load(f)

# Generate 20 questions
questions, tls, gts = agent_base.generate_batches(
    num_questions=20,
    topics=topics,
    batch_size=5,
    wadvsys=True,
    wicl=True,
    inc_samples=inc_samples,
    **gen_kwargs
)

print("Generated:", len(questions))


NameError: name 'QuestioningAgent' is not defined

In [4]:
filtered = agent.filter_questions(questions)

print("Total generated:", len(questions))
print("Valid questions:", len(filtered))
print("Validity rate:", len(filtered) / len(questions))

Total generated: 20
Valid questions: 19
Validity rate: 0.95


In [5]:
from agents.answer_agent import AnsweringAgent

a_agent = AnsweringAgent()

with open("agen.yaml") as f:
    agen_kwargs = yaml.safe_load(f)

answers, tls, gts = a_agent.answer_batches(
    questions=filtered,
    batch_size=5,
    **agen_kwargs
)

filtered_answers = a_agent.filter_answers(answers)

correct = 0
for q, a in zip(filtered, filtered_answers):
    if a is not None and q["answer"][0] == a["answer"]:
        correct += 1

accuracy = correct / len(filtered)
print("A-Agent Accuracy:", accuracy)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

STEPS: : 5batch [00:14,  2.89s/batch]                  

A-Agent Accuracy: 0.2631578947368421





In [6]:
N = len(filtered)
num_correct = sum(
    1 for q, a in zip(filtered, filtered_answers)
    if a is not None and q["answer"][0] == a["answer"]
)

a_score = num_correct * 100 / (N + 1e-9)
q_score = (N - num_correct) * 100 / (N + 1e-9)

print("Number of questions:", N)
print("Correct answers:", num_correct)
print("A-Agent Score:", a_score)
print("Q-Agent Score:", q_score)


Number of questions: 19
Correct answers: 5
A-Agent Score: 26.31578947229917
Q-Agent Score: 73.68421052243767


In [7]:
from agents.answer_agent import AnsweringAgent
a = AnsweringAgent()
print("Loaded successfully")


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Loaded successfully


In [14]:
from agents.question_agent import QuestioningAgent
import json

agent = QuestioningAgent()

with open("outputs/questions_50.json") as f:
    questions = json.load(f)

filtered = agent.filter_questions(questions)

print("Generated:", len(questions))
print("Valid:", len(filtered))

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Generated: 50
Valid: 50


In [15]:
syll = [q for q in filtered if "Syllogism" in q["topic"]]
series = [q for q in filtered if "Series" in q["topic"]]
blood = [q for q in filtered if "Blood" in q["topic"]]

for q in series[:10]:
    print(q)

for q in syll[:10]:
    print(q)

for q in blood[:10]:
    print(q)


{'topic': 'Series and Patterns/Mixed Series (Alphanumeric)', 'question': '_8_, _9_, AB10, CD11, EF12, GH13, __', 'choices': ['A) IA14', 'B) IK14', 'C) IH14', 'D) IG14'], 'answer': 'B', 'explanation': "The sequence alternates between single-letter increments followed by double-letter increments while numbers increment sequentially starting at 8. Thus, after 'GH', it should continue with 'IJ' then 'IK'. Correct continuation is IK14."}
{'topic': 'Series and Patterns/Mixed Series (Alphanumeric)', 'question': '_8_, _9_, AB10, CD11, EF12, GH13, __', 'choices': ['A) IA14', 'B) IK14', 'C) II14', 'D) IH14'], 'answer': 'C', 'explanation': "The sequence alternates between single-letter groups increasing by one letter at every step and double-letter groups where both letters increment sequentially while numbers also rise consecutively starting from 8. Thus, after 'GH', comes 'II' followed by 14."}
{'topic': 'Series and Patterns/Mixed Series (Alphanumeric)', 'question': '_8_, _9_, AB10, CD11, EF12,

In [9]:
all_runs = []

for i in range(1):
    questions, tls, gts = agent.generate_batches(
        num_questions=5,
        topics=topics,
        batch_size=5,
        wadvsys=True,
        wicl=True,
        inc_samples=inc_samples,
        **gen_kwargs
    )
    
    filtered = agent.filter_questions(questions)
    
    print(f"\nRUN {i+1}")
    print(f"Generated: {len(questions)} | Valid: {len(filtered)}")
    
    all_runs.append(filtered)


STEPS: 100%|██████████| 1/1 [00:06<00:00,  6.95s/it]

Skipping invalid JSON at index 4: {
  "topic": "Logical Reasoning/Syllogisms",
  "question": "Statement I: Every circle is a square
Statement II: Each triangle intersects at least two squares
Statement III: Not all triangles intersect circles
Conclusion I: At least one non-circle exists that intersects every square
Conclusion II: It's impossible for any square to fully contain another.",
  "choices": [
    "A) A) If only Conclusion I follows",
    "B) B) If only Conclusion II follows",
    "C) C) If Conclusions I and II both follow",
    "D) D) Neither Conclusion I nor Conclusion II follows"
  ],
  "answer": "B",
  "explanation": "Every square contains itself; thus no other shape can intersect each square uniquely. Statement II does not imply containment rules between shapes."
}

RUN 1
Generated: 5 | Valid: 4





In [1]:
!python -m agents.question_agent \
--output_file outputs/questions_50.json \
--num_questions 30 \
--batch_size 5 \
--verbose

`torch_dtype` is deprecated! Use `dtype` instead!
Loading checkpoint shards: 100%|██████████████████| 8/8 [00:18<00:00,  2.30s/it]
STEPS:   0%|                                              | 0/6 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
STEPS: 100%|██████████████████████████████████████| 6/6 [00:57<00:00,  9.57s/it]
Generated 30 questions!
{
  "topic": "Logical Reasoning/Syllogisms",
  "question": "Statement I: Some birds are mammals\nStatement II: All mammals are reptiles\nStatement III: Some reptiles are amphibians\nStatement IV: All amphibians are fish\nConclusions:\nI: Some fish are mammals\nII: Some reptiles are birds",
  "choices": [
    "A) Only Conclusion I follows.",
    "B) Only Conclusion II follows.",
    "C) Both Conclusions I and II follow.",
    "D) Neither Conclusion I nor Conclusion II follows."
  ],
  "answer": "A",
  "explanation": "From State

In [None]:
from agents.question_agent import QuestioningAgent
import json

agent = QuestioningAgent()

with open("outputs/questions_50.json") as f:
    questions = json.load(f)

filtered = agent.filter_questions(questions)

print("Generated:", len(questions))
print("Valid:", len(filtered))

In [2]:
from agents.question_model import QAgent


In [5]:
# agent = QAgent(
#     use_lora=True,
#     lora_path="/workspace/AAIPL/qwen_lora_output/checkpoint-100"
# )
agent_base = QAgent(use_lora=False)

print(type(agent.model))


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

<class 'peft.peft_model.PeftModelForCausalLM'>


In [4]:
for name, param in agent.model.named_parameters():
    if "lora" in name.lower():
        print("LoRA parameter found:", name)
        break

LoRA parameter found: base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight


In [4]:
import json
import yaml
from agents.question_agent import QuestioningAgent

# Load topics
with open("assets/topics.json") as f:
    topics = json.load(f)

# Load ICL samples
inc_samples = QuestioningAgent.load_icl_samples("assets/topics_example.json")

# Load generation config
with open("qgen.yaml") as f:
    gen_kwargs = yaml.safe_load(f)


In [5]:
from agents.question_agent import QuestioningAgent

agent_base = QuestioningAgent(use_lora=False)

questions_base, tls_base, gts_base = agent_base.generate_batches(
    num_questions=50,
    topics=topics,
    batch_size=5,
    wadvsys=True,
    wicl=True,
    inc_samples=inc_samples,
    **gen_kwargs
)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

STEPS:   0%|          | 0/10 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
STEPS: 100%|██████████| 10/10 [00:49<00:00,  4.94s/it]


In [None]:
from agents.question_agent import QuestioningAgent
agent_lora = QuestioningAgent(
    use_lora=True,
    lora_path="/workspace/AAIPL/qwen_lora_output/checkpoint-100"
)

questions_lora, tls_lora, gts_lora = agent_lora.generate_batches(
    num_questions=50,
    topics=topics,
    batch_size=5,
    wadvsys=True,
    wicl=True,
    inc_samples=inc_samples,
    **gen_kwargs
)


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Loading LoRA from: /workspace/AAIPL/qwen_lora_output/checkpoint-100


STEPS:  50%|█████     | 5/10 [01:39<01:59, 23.81s/it]

In [1]:
filtered_base = agent_base.filter_questions(questions_base)
filtered_lora = agent_lora.filter_questions(questions_lora)

print("Base Valid:", len(filtered_base))
print("LoRA Valid:", len(filtered_lora))


NameError: name 'agent_base' is not defined