In [None]:
# importing libraries
import os
import sqlite3
from dotenv import load_dotenv
import openai
import pinecone
import langchain
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import RetrievalQA

In [None]:
# Load the environment variables from the .env file.
load_dotenv()

# Get the value of the API_KEY environment variable.
api_key = os.getenv('api_key')
environment = os.getenv('environment')
index_name = os.getenv('index_name')

In [None]:
# Create the vector store object
pinecone.init(api_key=api_key, environment=environment)
index = pinecone.Index(index_name)

In [None]:
# Create the embeddings object
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

In [None]:
# Finding Similar Documents using Pinecone Index
docsearch = Pinecone.from_existing_index(index_name, embeddings)

In [None]:
model_name = "text-davinci-003"
llm = OpenAI(model=model_name)
chain = load_qa_chain(llm, chain_type="stuff")

In [None]:
# SQLite database setup
conn = sqlite3.connect('pinecone_data.db')
c = conn.cursor()

In [None]:
# Create table
c.execute('''CREATE TABLE IF NOT EXISTS data
             (input_document TEXT, document_id TEXT, vector_id TEXT, search_result TEXT, update_timestamp TEXT)''')

In [None]:
def get_answer(query):
    similar_docs = docsearch.similarity_search(query)
    if not similar_docs:
        return "I don't know the information"
    answer = chain.run(input_documents=similar_docs, question=query)
    
    # Store data in SQLite
    for doc in similar_docs:
        c.execute("INSERT INTO data VALUES (?, ?, ?, ?, datetime('now'))", 
                  (doc.page_content, doc.page_id, doc.vector_id, answer))
    conn.commit()
    
    return answer

In [None]:
query = "What is a Bills house?"
print(get_answer(query))

In [None]:
# Close SQLite connection
conn.close()

In [None]:
# To implement the "Query the LLM and get formatted, validated, and corrected output" part,
# we need to modify the get_answer function to use the GuardrailsOutputParser. Steps:

#Step1: we first create a RAIL spec that specifies that the output of the language model should be an object with three properties: 
## question_topic, 
## answer_topic, and 
## confidence_score where the confidence_score is a float with a valid range of 0 to 1.

#Step2: Next, we create a GuardrailsOutputParser with the RAIL spec.

##Step3: Finally, we modify the get_answer function to parse the output of the language model using the GuardrailsOutputParser. 
##Step4: The parsed output is then stored in the SQLite database.

# Assumptions: 
## the output of the language model matches the structure specified in the RAIL spec. 
## If the output of the language model does not match this structure, the GuardrailsOutputParser will raise an error.
## We may need to adjust the RAIL spec and the get_answer function based on the actual output of your language model.


In [None]:
from guardrails.guard import GuardrailsOutputParser

# Create a RAIL spec
rail_spec = """
<rail version="0.1">
<output>
 <object name="qa_info">
 <string name="question_topic" description="Topic of the question" />
 <string name="answer_topic" description="Topic of the answer" />
 <float name="confidence_score" format="valid-range: 0 1" description="Confidence score for the match" />
 </object>
</output>
"""

In [None]:
# Create a GuardrailsOutputParser
parser = GuardrailsOutputParser(rail_spec)

In [None]:
def get_answer(query):
    similar_docs = docsearch.similarity_search(query)
    if not similar_docs:
        return "I don't know the information"
    answer = chain.run(input_documents=similar_docs, question=query)
    
    # Parse the output using Guardrails
    parsed_output = parser.parse(answer)
    
    # Store data in SQLite
    for doc in similar_docs:
        c.execute("INSERT INTO data VALUES (?, ?, ?, ?, datetime('now'))", 
                  (doc.page_content, doc.page_id, doc.vector_id, parsed_output))
    conn.commit()
    
    return parsed_output