In [1]:
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chat_models import AzureChatOpenAI
from langchain.chains import RetrievalQA

import os

In [2]:
os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_VERSION"] = "2023-07-01-preview"
os.environ["OPENAI_API_BASE"] = ""
os.environ["OPENAI_API_KEY"] = ""

In [3]:
embedding_model = OpenAIEmbeddings(chunk_size=10)

In [4]:
OCR_Content = TextLoader('OCR.txt').load()
text_splitter = CharacterTextSplitter(chunk_overlap=100)
content = text_splitter.split_documents(OCR_Content)

In [5]:
faiss_db = FAISS.from_documents(content, embedding_model)

In [6]:
retriever = faiss_db.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [7]:
llm = AzureChatOpenAI(
    temperature=0,
    deployment_name="gpt-4",
)

In [8]:
prompt_template = """

Task: Analyze the JSON receipt data provided and group "value" entries with similar "geometry" proximity under "words," then summarize this information into one concise sentence.
    
JSON Data:
{context}
    
User questions: 
{question}
       
Respond to the user in JSON format and include the key-value pairs:

"""
QA_PROMPT = PromptTemplate(
    template=prompt_template, input_variables=['context', 'question']
)

In [9]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm, 
    retriever=retriever, 
    chain_type_kwargs={"prompt": QA_PROMPT},
    verbose=True
)

In [10]:
question = """

Please extract the following details:
- Amount, 
- Receipt/Invoice number, 
- Date & Time,
- Line Items

"""

result = qa_chain({"query": question})



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [11]:
print(result["result"])

{
  "Amount": "$23.90",
  "Receipt/Invoice Number": "BKA3500490695",
  "Date & Time": "13/02/2022 19:21",
  "Line Items": [
    {
      "Item": "#OTIS BARISTA OAT MILK 1L",
      "Quantity": "4",
      "Price": "$6.95",
      "Total": "$27.80"
    },
    {
      "Item": "Discount",
      "Quantity": "1",
      "Price": "-$3.90",
      "Total": "-$3.90"
    }
  ]
}
