In [None]:
import requests
import json
import pandas as pd

ollama_url_inf = "http://localhost:11434/api/show"
ollama_url_emb = "http://localhost:11434/api/embeddings"
ollama_url_gen = "http://localhost:11434/api/generate"
ollama_url_cht = "http://localhost:11434/api/chat"
ollama_model_name = "llama3.2:latest"


def generate_qa_pairs(chunk, model=ollama_model_name):
    prompt = f"""
You are an expert assistant skilled at generating question-answer pairs for text comprehension. 
Given a text chunk, generate diverse, relevant, and accurate question-answer pairs that cover the key information in the chunk. 
Your output should be a list of pairs in JSON format, where each pair includes a "question" and its corresponding "answer."

Example:
Chunk: "The Eiffel Tower is a wrought-iron lattice tower located on the Champ de Mars in Paris, France. It was designed by Gustave Eiffel's engineering company and completed in 1889."

Output:
[
  {{"question": "Where is the Eiffel Tower located?", "answer": "The Eiffel Tower is located on the Champ de Mars in Paris, France."}},
  {{"question": "Who designed the Eiffel Tower?", "answer": "The Eiffel Tower was designed by Gustave Eiffel's engineering company."}},
  {{"question": "When was the Eiffel Tower completed?", "answer": "The Eiffel Tower was completed in 1889."}}
]

Now, process the following chunk:
Chunk: {chunk}
"""
    
    payload = {"model": model, "prompt": prompt, "stream": False}
    headers = {"Content-Type": "application/json"}
    response = requests.post(ollama_url_gen, headers=headers, data=json.dumps(payload))
    if response.status_code == 200:
        return json.loads(response.json().get("content", "[]"))
    else:
        raise Exception(f"Error from Ollama: {response.text}")

data = {"chunk_id": [1], "chunk_text": ["List | War | Death range | Date | Combatants | Location ||| crusades | 1 – 9 million [ 25 ] [ 26 ] | 1095 – 1291 | originally byzantine empire vs. seljuk empire, but evolved into christians vs. muslims | europe and the middle east || Reconquista | 7 million[27] | 718–1492 | Spanish and Portuguese Christians vs. Spanish and Portuguese Muslims | Iberian Peninsula || French Revolutionary and Napoleonic Wars | 5–7 million[28] | 1792–1815 | French Republic, later French Empire, vs. Coalition forces | Europe || Conquests of Menelik II | 6 million[29] | 1878–1904 | Ethiopian Empire vs. Emirate of Harar, Kingdom of Kaffa, Kingdom of Wolaita, and allies | Horn of Africa || Second Congo War | 3–5.4 million[30][31][32] | 1998–2003 | Multiple sides | Democratic Republic of the Congo || Spanish conquest of New Granada | 5.25 million[33][34] | 1525–1540 | Spanish Empire and Klein-Venedig vs. Muisca Confederation and other civilizations | Colombia || Deccan wars | 4.6–5 million[35] | 1680–1707 | Mughal Empire vs. Maratha Confederacy | Indian subcontinent || Nigerian Civil War | 3.04–4.1 million[36][37] | 1967–1970 | Nigeria vs. Biafra | Nigeria || Deluge | 3–4 milion[38] | 1648–1666 | Poland–Lithuania vs Swedish Empire and Russia | Eastern Europe || French Wars of Religion | 2–4 million[39] | 1562–1598 | French catholics vs Huguenots | France || Korean War | 2.5–3.5 million[40][22] | 1950–1953 | North Korea and allies vs. South Korea and allies | Korean Peninsula || Vietnam War | 1.1–3.4 million[41][42] | 1955–1975 | North Vietnam and allies vs. South Vietnam and allies | Indochina ||"]}
df_chunks = pd.DataFrame(data)

qa_data = []
for _, row in df_chunks.iterrows():
    chunk_id = row["chunk_id"]
    chunk_text = row["chunk_text"]
    try:
        qa_pairs = generate_qa_pairs(chunk_text)
        for pair in qa_pairs:
            qa_data.append({
                "chunk_id": chunk_id,
                "question": pair["question"],
                "answer": pair["answer"]
            })
    except Exception as e:
        print(f"Error processing chunk {chunk_id}: {e}")

df_qa = pd.DataFrame(qa_data)
df_qa.to_csv("qa_pairs.csv", index=False)
print("QA pairs saved to qa_pairs.csv")

df_qa

In [10]:
def ask(query):
    payload = {"model": "llama3.2:latest", "prompt": query, "stream": False}
    headers = {"Content-Type": "application/json"}
    response = requests.post(ollama_url_gen, headers=headers, data=json.dumps(payload))
    print(response.json().get("response", "No response available"))

In [15]:
ask('''You are an expert assistant skilled at generating question-answer pairs for text comprehension. 
Given a text chunk, generate diverse, relevant, and accurate question-answer pairs that cover the key information in the chunk. 
Your output should be a list of pairs in JSON format, where each pair includes a "question" and its corresponding "answer."

Example:
Chunk: "The Eiffel Tower is a wrought-iron lattice tower located on the Champ de Mars in Paris, France. It was designed by Gustave Eiffel's engineering company and completed in 1889."

Output:
[
  {{"question": "Where is the Eiffel Tower located?", "answer": "The Eiffel Tower is located on the Champ de Mars in Paris, France."}},
  {{"question": "Who designed the Eiffel Tower?", "answer": "The Eiffel Tower was designed by Gustave Eiffel's engineering company."}},
  {{"question": "When was the Eiffel Tower completed?", "answer": "The Eiffel Tower was completed in 1889."}}
]

Now, process the following chunk:
Chunk: List | War | Death range | Date | Combatants | Location ||| crusades | 1 – 9 million [ 25 ] [ 26 ] | 1095 – 1291 | originally byzantine empire vs. seljuk empire, but evolved into christians vs. muslims | europe and the middle east || Reconquista | 7 million[27] | 718–1492 | Spanish and Portuguese Christians vs. Spanish and Portuguese Muslims | Iberian Peninsula || French Revolutionary and Napoleonic Wars | 5–7 million[28] | 1792–1815 | French Republic, later French Empire, vs. Coalition forces | Europe || Conquests of Menelik II | 6 million[29] | 1878–1904 | Ethiopian Empire vs. Emirate of Harar, Kingdom of Kaffa, Kingdom of Wolaita, and allies | Horn of Africa || Second Congo War | 3–5.4 million[30][31][32] | 1998–2003 | Multiple sides | Democratic Republic of the Congo || Spanish conquest of New Granada | 5.25 million[33][34] | 1525–1540 | Spanish Empire and Klein-Venedig vs. Muisca Confederation and other civilizations | Colombia || Deccan wars | 4.6–5 million[35] | 1680–1707 | Mughal Empire vs. Maratha Confederacy | Indian subcontinent || Nigerian Civil War | 3.04–4.1 million[36][37] | 1967–1970 | Nigeria vs. Biafra | Nigeria || Deluge | 3–4 milion[38] | 1648–1666 | Poland–Lithuania vs Swedish Empire and Russia | Eastern Europe || French Wars of Religion | 2–4 million[39] | 1562–1598 | French catholics vs Huguenots | France || Korean War | 2.5–3.5 million[40][22] | 1950–1953 | North Korea and allies vs. South Korea and allies | Korean Peninsula || Vietnam War | 1.1–3.4 million[41][42] | 1955–1975 | North Vietnam and allies vs. South Vietnam and allies | Indochina ||''')

Here is the list of question-answer pairs in JSON format:

[
    {"question": "What was the approximate number of deaths during the Crusades?", "answer": "The Crusades resulted in a death range of 1–9 million."},
    {"question": "When did the Crusades take place?", "answer": "The Crusades took place from 1095 to 1291."},
    {"question": "Which empires were involved in the Crusades?", "answer": "The Crusades involved the Byzantine Empire vs. the Seljuk Empire, which later evolved into Christians vs. Muslims."},
    {"question": "Where did the Crusades occur?", "answer": "The Crusades took place in Europe and the Middle East."},
    {"question": "How many deaths occurred during the Reconquista?", "answer": "The Reconquista resulted in approximately 7 million deaths."},
    {"question": "When did the Reconquista take place?", "answer": "The Reconquista took place from 718 to 1492."},
    {"question": "Which sides were involved in the Reconquista?", "answer": "The Reconquista was fought 