In [9]:
import requests
from bs4 import BeautifulSoup
import json

# URL to scrape
url = "https://www.kseebsolutions.com/kseeb-solutions-for-class-6-social-science-chapter-2-part-1/"

# Send an HTTP request to the website
response = requests.get(url)
response.raise_for_status()  # Ensure we handle HTTP request errors

# Parse the HTML content of the page
soup = BeautifulSoup(response.text, 'html.parser')

# Extract content (headings, paragraphs, and lists)
data = {
    # H2 headings
    "headings": [h.get_text(strip=True) for h in soup.find_all('h2')],
    # Paragraph text
    "paragraphs": [p.get_text(strip=True) for p in soup.find_all('p')],
    # List items
    "list_items": [li.get_text(strip=True) for li in soup.find_all('li')]
}

# Save extracted data to a JSON file
with open('books.json', 'w', encoding='utf-8') as f:
    json.dump(data, f, indent=8, ensure_ascii=False)

print("Created JSON File: books.json")

Created JSON File: books.json


In [10]:
import json
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_google_genai import ChatGoogleGenerativeAI
import getpass
import os

# Set up the Google API key
if 'GOOGLE_API_KEY' not in os.environ:
    os.environ['GOOGLE_API_KEY'] = getpass.getpass(
        'Provide your Google API Key: ')

try:
    # Step 1: Load JSON Content
    with open('books.json', 'r', encoding='utf-8') as f:
        data = json.load(f)

    # Step 2: Combine Content for Input
    # Safely fetch 'paragraphs'
    content = "\n".join(data.get('paragraphs', []))

    if not content:
        raise ValueError(
            "The JSON file does not contain 'paragraphs' or it's empty.")

    # Step 3: Define the Prompt Template
    qa_prompt = PromptTemplate(
        input_variables=["text"],
        template=(
            "Given the following educational content, which has a set of multiple choice questions and answers, extract key questions and their answers options:\n\n"
            "{text}\n\n"
            "Format the output as:\n"
            "1. Question: <Your question>\n   Options:<Your Options> \n Answer: <Your answer>\n"
            "2. Question: <Your question>\n   Options:<Your Options> \n Answer: <Your answer>\n"
        )
    )

    # Step 4: Set Up the LLM
    llm = ChatGoogleGenerativeAI(model='gemini-pro', temperature=0.3)
    qa_chain = LLMChain(llm=llm, prompt=qa_prompt)

    # Step 5: Generate Questions and Answers
    response = qa_chain.run(text=content)

    if not response.strip():
        raise ValueError(
            "The LLM returned an empty response. Check the input or model settings.")

    # Step 6: Save the Output
    output_data = {
        "questions_and_answers": [
            {"question": q.strip(), "answer": a.strip()}
            for q, a in [pair.split("Answer:") for pair in response.split("Question:") if "Answer:" in pair]
        ]
    }

    with open('questions_and_answers.json', 'w', encoding='utf-8') as f:
        json.dump(output_data, f, indent=4, ensure_ascii=False)

    print("Generated Q&A JSON File: questions_and_answers.json")

except json.JSONDecodeError:
    print("Error: Failed to decode the JSON file. Please check the file format.")
except ValueError as e:
    print(f"Error: {e}")
except Exception as e:
    print(f"Unexpected Error: {e}")

Generated Q&A JSON File: questions_and_answers.json
