# Create sample Q/A dataset from knowledge base stored in vector DB 

In [9]:
import ollama
import json

In [20]:
# Prompt to generate Question-Answer pairs from the provided text

prompt = """
Write a FAQ page for a college called TBC College which is based in Toronto, Ontario. 

Write 3 questions and answers from the provided context. 
Organize the questions and answers in a python list with question and answer stored in a dictionary, and use clear and concise language.
Use the folowing format for dictionary,
{"question":,
"answer":}

Only provide the list as output. Do not output any other text.

Context:

"""

In [11]:
import psycopg2

# Connect to the database
conn = psycopg2.connect(database = "test_db", 
                        user = "postgres", 
                        host= 'localhost',
                        password = "deep",
                        port = 5432)

# To make the changes to the database persistent
conn.autocommit = True

In [12]:
# Read parameters file
parameters_file = "pg_params.json"

with open(parameters_file, 'r') as fh:
    params = json.loads(fh.read())

In [13]:
# Fetch all data from the table
cur = conn.cursor()
cur.execute('SELECT * FROM college_text;')
rows = cur.fetchall()
conn.close()

In [14]:
len(rows)

82

In [15]:
data = [x[1] for x in rows]

In [16]:
data[:2]

["'Student to Student Support Peer support recognizes that students naturally turn to each other for support and connection. Our Student Ambassadors utilize their lived experience as international students to provide friendly, respectful support to help students build a healthy and successful college experience. Click here(https://tbcollege0.sharepoint.com/:b:/s/ExternShare/EWPHCe-qNINHgqTW486g3-YBD-0vkdNy2Ke0sKixC01BLQ?e=cAGXGg) to know more. '",
 "' STUDENT SUCCESS: Student to Student Support Peer support recognizes that students naturally turn to each other for support and connection. Our Student Ambassadors utilize their lived experience as international students to provide friendly, respectful support to help students build a healthy and successful college experience. Click here(https://tbcollege0.sharepoint.com/:b:/s/ExternShare/EWPHCe-qNINHgqTW486g3-YBD-0vkdNy2Ke0sKixC01BLQ?e=cAGXGg) to know more. '"]

In [None]:
from tqdm.auto import tqdm

# Create questions and answers for the provided data using Language Model
qa = []

for record in tqdm(data):
    result = ollama.generate(model=params["lm_name"], prompt=prompt+record)
    
    print(result)
    try:
        response = json.loads(result['response'])
        clean_response = [x for x in response if not(x['question']=='' or x['answer']=='')]
        qa.extend(clean_response)
    except:
        pass
    

  from .autonotebook import tqdm as notebook_tqdm


In [23]:
# Save QA list in a json file
with open(params["qa_filename"], 'w') as fh:
    fh.write(json.dumps(qa, indent=4))