In [None]:
import json
import pandas as pd

# Load the JSON files
with open('../questions.json', 'r') as f:
    questions = json.load(f)

with open('../answers.json', 'r') as f:
    answers = json.load(f)

# Create dictionaries for easier lookup
questions_dict = {q["question_number"]: q for q in questions}
answers_dict = {a["question_number"]: a for a in answers}

# Check for mismatches
mismatches = []
for question_number, question_data in questions_dict.items():
    if question_number in answers_dict:
        answer_data = answers_dict[question_number]
        
        # Check if correct answers match
        if question_data["correct_answer"] != answer_data["correct_answer"]:
            mismatches.append({
                "question_number": question_number,
                "chapter": question_data["chapter"],
                "question": question_data["question"],
                "questions_correct_answer": question_data["correct_answer"],
                "answers_correct_answer": answer_data["correct_answer"]
            })

# Display results
if mismatches:
    print(f"Found {len(mismatches)} mismatches between questions.json and answers.json:")
    mismatches_df = pd.DataFrame(mismatches)
    display(mismatches_df)
else:
    print("All answers match correctly between questions.json and answers.json")

# Additional checks
print("\nChecking for missing questions or answers...")

# Check if all questions have corresponding answers
questions_without_answers = [q["question_number"] for q in questions if q["question_number"] not in answers_dict]
if questions_without_answers:
    print(f"Found {len(questions_without_answers)} questions without answers: {questions_without_answers}")
else:
    print("All questions have corresponding answers.")

# Check if all answers have corresponding questions
answers_without_questions = [a["question_number"] for a in answers if a["question_number"] not in questions_dict]
if answers_without_questions:
    print(f"Found {len(answers_without_questions)} answers without questions: {answers_without_questions}")
else:
    print("All answers have corresponding questions.")

# Statistics
print(f"\nTotal questions: {len(questions)}")
print(f"Total answers: {len(answers)}")

# Distribution of correct answers
correct_answer_counts = {"a": 0, "b": 0, "c": 0}
for q in questions:
    correct_answer_counts[q["correct_answer"]] += 1

print("\nDistribution of correct answers:")
for option, count in correct_answer_counts.items():
    percentage = (count / len(questions)) * 100
    print(f"Option {option}: {count} ({percentage:.1f}%)")

# Distribution by chapter
chapter_counts = {}
for q in questions:
    chapter = q["chapter"]
    if chapter not in chapter_counts:
        chapter_counts[chapter] = 0
    chapter_counts[chapter] += 1

print("\nDistribution by chapter:")
for chapter, count in sorted(chapter_counts.items(), key=lambda x: int(x[0])):
    percentage = (count / len(questions)) * 100
    print(f"Chapter {chapter}: {count} questions ({percentage:.1f}%)")

All answers match correctly between questions.json and answers.json

Checking for missing questions or answers...
All questions have corresponding answers.
All answers have corresponding questions.

Total questions: 150
Total answers: 150

Distribution of correct answers:
Option a: 44 (29.3%)
Option b: 48 (32.0%)
Option c: 58 (38.7%)

Distribution by chapter:
Chapter 1: 21 questions (14.0%)
Chapter 2: 21 questions (14.0%)
Chapter 3: 22 questions (14.7%)
Chapter 4: 21 questions (14.0%)
Chapter 5: 21 questions (14.0%)
Chapter 6: 22 questions (14.7%)
Chapter 7: 22 questions (14.7%)
