In [3]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import WebBaseLoader
import bs4
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

import gradio as gr
import random
import time
import re

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
def web_link(link=None,query=None):

  if(link):
    try:
      loader = WebBaseLoader(web_path=(link,), bs_kwargs=dict(parse_only = bs4.SoupStrainer(
        class_ = ("post-header", "post-content")
      )))

      webpagedoc = loader.load()

      text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)

      global documents
      documents = text_splitter.split_documents(webpagedoc)

      #vector embedding and storage
      global db
      db = "DB"

      db = Chroma.from_documents(documents[:15], OpenAIEmbeddings(openai_api_key = OPENAPIKEY))

      return "Link Read"

    except Exception as e:
      return str(e)

  else:
      result = "No Data"

      if(result=="No Data"):
        result = db.similarity_search(query)
        result = result[0].page_content
        pass

      return result

In [6]:
def pdf_file(path, message):
  #implement pdf query logic here

  loader = PyPDFLoader(path)
  docs = loader.load()
  
  text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
  documents = text_splitter.split_documents(docs)

  db = Chroma.from_documents(documents[:15], OpenAIEmbeddings(openai_api_key = OPENAPIKEY))

  query = message
  
  result = db.similarity_search(query)

  response = result[0].page_content
  return response

In [7]:
def text_file(path, message):
  #implement text query logic here

  loader = TextLoader(path)
  text_documents = loader.load()
  text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
  documents = text_splitter.split_documents(text_documents)

  db = Chroma.from_documents(documents[:15], OpenAIEmbeddings(openai_api_key = OPENAPIKEY))

  query = message
  
  result = db.similarity_search(query)

  response = result[0].page_content
  return response

In [None]:
########################### GLOBAL VARIABLES ####################################
user_mode = None
path_file = None    #path to the current file uploaded

############################## FUNCTIONS ########################################
def echo(message, history):
    #message holds the input query from the user
    #history holds the chat history in the form of a list containing lists.
    #The inner list structure: [{User text},{Bot text}]

    print(history)

    #No mode
    if(user_mode==None):
      return "Please select an option"

    #web link
    if(user_mode==3):
      url_pattern = re.compile(r'https?://\S+')

      # Search for a URL in the message text
      matchCon = url_pattern.search(message)

      if matchCon:
          url = matchCon.group(0)  # Extract the matched URL
          response = web_link(link=url)
      else:
          #function to give query to appropriate web link function and return response from it
          response = web_link(query=message)

      return response

    #text file mode
    if(user_mode==2):
      return text_file(path_file, message) #path to current .txt file uploaded for this mode

    #pdf file mode
    if(user_mode==1):
      return pdf_file(path_file, message)  #path to current .pdf file uploaded for this mode

#---------------------------------------------------------------------------------------------#

def process_files(argument):

    global path_file
    path_file = argument

    return gr.File(label="Upload File", visible=True,interactive=True)

#---------------------------------------------------------------------------------------------#

def change_mode(choice):
    global user_mode

    if choice == "Web Link":
        user_mode = 3
        return gr.File(visible=False), gr.Button("Submit", visible=False), gr.Button("Cancel", visible=False)
    elif choice == "Text File":
        user_mode = 2
        return gr.File(label="Upload File",visible=True), gr.Button("Submit", visible=True), gr.Button("Cancel", visible=True)
    else:
        user_mode = 1
        return gr.File(label="Upload File",visible=True), gr.Button("Submit", visible=True), gr.Button("Cancel", visible=True)

#--------------------------------------------------------------------------------------------#

def cancel_upload():
    return gr.File(label="Upload File", visible=True,interactive=True)

############################## INTERFACE CODE ###################################
with gr.Blocks() as demo:
    #set up radio element and file input
    with gr.Row():
      radio = gr.Radio(
          ["PDF File", "Text File", "Web Link"], label="Select Mode"
      )

      # input = gr.Interface(process_files,inputs='files',outputs=None)
      with gr.Column():
        file_input = gr.File(label="Upload File", visible=False,interactive=True)

        #set up buttons
        with gr.Row():
            submit_btn = gr.Button("Submit", visible=False)
            cancel_btn = gr.Button("Cancel", visible=False)


    #add radio element event listener
    radio.change(fn=change_mode, inputs=radio, outputs=[file_input,submit_btn,cancel_btn])

    # Set up submit button to process files
    submit_btn.click(fn=process_files, inputs=[file_input], outputs=[file_input])

    # Set up cancel button to clear file input
    cancel_btn.click(fn=cancel_upload, inputs=None, outputs=file_input)

    #set up Chat Interface
    gr.ChatInterface(
        fn=echo,
        title="Doc Bot",
    )

demo.launch(debug=True)