In [None]:
!pip install openai langchain faiss-cpu pypdf docx2txt chromadb tiktoken gradio

In [None]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import Docx2txtLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
import openai

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "Your API Key"

In [None]:
TEMPERATURE = 0
MODEL_NAME = "gpt-3.5-turbo" #gpt-4
class docqa_agent(object):
  def __init__(self,
               open_ai_api_key,
               model=MODEL_NAME,
               temperature=TEMPERATURE):
    openai.api_key = open_ai_api_key
    self._openai_key  = open_ai_api_key
    self.chat_model = ChatOpenAI(model=model, temperature=temperature)
    self.qa = None
  def ingest(self,doc_path):
    '''
    Ingest the split text to vector DB
    '''

    texts  = self.load_data(doc_path)
    embeddings = OpenAIEmbeddings(openai_api_key=self._openai_key)
    # load it into Chroma
    db = Chroma.from_documents(texts, embeddings)

    return db
  def load_data(self,doc_path):

      # Load all the files in a folder
      loader =  Docx2txtLoader(doc_path)# , glob="**/*.md")
      documents = loader.load()
      text_splitter = CharacterTextSplitter(chunk_size=600, chunk_overlap=100)
      texts = text_splitter.split_documents(documents)
      return texts
  def make_qa(self,doc_path):
    self.db = self.ingest(doc_path.name) # .name to get path
    self.qa = RetrievalQA.from_chain_type(llm=OpenAI(model_name=MODEL_NAME), chain_type="stuff", retriever=self.db.as_retriever(search_kwargs={"k": 3}))
    print('QA Chain is sucessfully made.')
  def run(self,query):
    if self.qa is not None:
      answer = self.qa.run(query)
      return answer
    else:
      print('Please make qa chain first.')

In [None]:
qa = docqa_agent(os.environ["OPENAI_API_KEY"] )

In [None]:

import gradio as gr
app = gr.Blocks()
with app:
  gr.Markdown("## Q&A over your docx documents")
  with gr.Tabs():
    with gr.Row():
      with gr.Column():
        query = gr.Textbox(
                            EXAMPLE_QUERY, label="Question of your document", lines=2)
        docs_upload = gr.File(
                            file_count="single", file_types=[".docx"])
        ans_button = gr.Button("Submit")
      with gr.Column():
        answer = gr.Textbox(
                            value="Your answer will appear here",
                            label="Answer",
                            lines=5,
                        )
  docs_upload.upload(
      qa.make_qa,
      inputs = docs_upload
  )
  ans_button.click(
      qa.run,
      inputs =[query],
      outputs = [answer]
  )
app.launch(share=True,debug=False)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://edb1dd531ba95175d8.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [None]:
app.__dir

False

In [None]:
app.close()

Closing server running on port: 7860
