In [6]:
import json
import requests
import os

# default parameter
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
INPUT_FILE = ".crawling-siteweb/out/result.json"
OUTPUT_FILE = "./out/qa_dataset.jsonl"
MODEL_NAME = "llama3-8b-8192"
NUM_QNR = 10 # parameter for number of pairs QnR
# Too many Q&A pairs may exceed token limit → keep it small (e.g. ≤ 10)


In [7]:
# read content from result.json
with open(INPUT_FILE, "r", encoding="utf-8") as f:
    data = json.load(f)

content = data[0]["content"]
url = data[0]["url"]
print(f"\n URL: {url}\n")

# send QnR request to Groq
prompt = f"""
Here is technical context:

{content}

Generate {NUM_QNR} pairs of Question-Response that are clear and helpful based on this content.
Format:
Question: ...
Response: ...
"""

headers = {
    "Authorization": f"Bearer {GROQ_API_KEY}",
    "Content-Type": "application/json"
}

payload = {
    "model": MODEL_NAME,
    "messages": [{"role": "user", "content": prompt}],
    "temperature": 0.7, #Sufficiently creative 0.5<t<1.0
    "max_tokens": 1024
}




 URL: https://python.langchain.com/docs/introduction/



In [8]:
response = requests.post("https://api.groq.com/openai/v1/chat/completions", headers=headers, json=payload)
result = response.json()
answer_text = result["choices"][0]["message"]["content"]

# parsing response from groq
lines = answer_text.strip().split('\n')
all_qas = []
question, answer = None, None

for line in lines:
    if line.lower().startswith("question:"):
        question = line.split(":", 1)[1].strip()
    elif line.lower().startswith("response:"):
        answer = line.split(":", 1)[1].strip()
        if question and answer:
            all_qas.append({
                "instruction": question,
                "input": "",
                "output": answer
            })
            print(f"\n Q: {question}\n A: {answer}\n")
            question, answer = None, None



 Q: What is LangChain?
 A: LangChain is a framework for developing applications powered by large language models (LLMs).


 Q: How do I build a simple LLM application?
 A: Check out the "Build a Simple LLM Application" tutorial in the LangChain tutorials section.


 Q: What are the key components of LangChain?
 A: LangChain consists of multiple open-source libraries, including langchain-core, langchain, langchain-community, and langgraph.


 Q: How do I integrate LangChain with a specific provider?
 A: Check out the integrations page for a list of providers and their corresponding integration packages.


 Q: What is LangGraph, and how is it related to LangChain?
 A: LangGraph is an orchestration framework that combines LangChain components into production-ready applications with persistence, streaming, and other key features.


 Q: How do I evaluate and optimize my LangChain application?
 A: Use LangSmith to inspect, monitor, and evaluate your applications, and continuously optimize a

In [9]:

# save result
with open(OUTPUT_FILE, "w", encoding="utf-8") as f:
    for qa in all_qas:
        f.write(json.dumps(qa, ensure_ascii=False) + "\n")

print(f"\n{len(all_qas)} Q-R pairs saved to {OUTPUT_FILE}")



10 Q-R pairs saved to ./out/qa_dataset.jsonl
