<a href="https://colab.research.google.com/github/egerdm-ai/Langchain_Multiple_PDF_w_GDrive/blob/main/WFDF_Rules_QA_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain openai chromadb tiktoken pypdf

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
#this is for building the pdf qa from drive
import os
import sys
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import Docx2txtLoader
from langchain.document_loaders import TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.text_splitter import CharacterTextSplitter

os.environ["OPENAI_API_KEY"] = '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'
root_dir = "/content/drive/MyDrive/"
documents = []
# Create a List of Documents from all of our files in the ./docs folder
for file in os.listdir(root_dir+"pdf"):
    if file.endswith(".pdf"):
        pdf_path = root_dir+"pdf/" + file
        loader = PyPDFLoader(pdf_path)
        documents.extend(loader.load())
    elif file.endswith('.docx') or file.endswith('.doc'):
        doc_path = root_dir+"pdf/" + file
        loader = Docx2txtLoader(doc_path)
        documents.extend(loader.load())
    elif file.endswith('.txt'):
        text_path = root_dir+"pdf/" + file
        loader = TextLoader(text_path)
        documents.extend(loader.load())

# Split the documents into smaller chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 2000,
    chunk_overlap = 100,
    length_function = len,
    add_start_index = True,
)
documents = text_splitter.split_documents(documents)

# Convert the document chunks to embedding and save them to the vector store
vectordb = Chroma.from_documents(documents, embedding=OpenAIEmbeddings(), persist_directory=root_dir+"data")
vectordb.persist()

# create our Q&A chain
pdf_qa = ConversationalRetrievalChain.from_llm(
    ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo'),
    retriever=vectordb.as_retriever(search_kwargs={'k': 6}),
    return_source_documents=True,
    verbose=False
)



In [None]:
#this is for running the qa in colab

yellow = "\033[0;33m"
green = "\033[0;32m"
white = "\033[0;39m"

chat_history = []
print(f"{yellow}------------------------------------------------")
print('You can start asking questions about your files')
print('-----------------------------------------------')
MAX_HISTORY = 5  # The maximum number of exchanges to include in the history.

while True:
    query = input(f"{green}Prompt: ")
    if query == "exit" or query == "quit" or query == "q" or query == "f":
        print('Exiting')
        sys.exit()
    if query == '':
        continue

    # Only include the last `MAX_HISTORY` exchanges in the chat history.
    recent_history = chat_history[-MAX_HISTORY:]

    # Combine the recent exchanges into one string, with each question and answer on a new line.
    chat_history_text = "\n".join([f"User: {q}\nAI: {a}" for q, a in recent_history])

    # Include the current question at the end of the conversation history.
    full_text = f"{chat_history_text}\nUser: {query}"

    # Pass `full_text` to your model instead of just the question.
    result = pdf_qa({"question": full_text, "chat_history": chat_history})

    print(f"{white}Answer: " + result["answer"])
    chat_history.append((query, result["answer"]))

In [None]:
#this is for api

from flask import Flask, request, jsonify
from pyngrok import ngrok

app = Flask(__name__)

# Create a global variable to store chat history
chat_history = []
MAX_HISTORY = 5
@app.route('/ask', methods=['POST'])
def ask():
    global chat_history

    data = request.get_json()
    query = data['query']

    # Only include the last `MAX_HISTORY` exchanges in the chat history.
    recent_history = chat_history[-MAX_HISTORY:]

    # Combine the recent exchanges into one string, with each question and answer on a new line.
    chat_history_text = "\n".join([f"User: {q}\nAI: {a}" for q, a in recent_history])

    # Include the current question at the end of the conversation history.
    full_text = f"{chat_history_text}\nUser: {query}"

    # Fetch the pdf_qa object from cache and ask the question.
    result = pdf_qa({"question": full_text, "chat_history": ""})

    # Append the current question and answer to the chat history.
    chat_history.append((query, result['answer']))

    response = {
        'answer': result['answer']
    }

    return jsonify(response)


# Setup ngrok and start the Flask application
public_url = ngrok.connect(5000)
print("Your API is accessible publicly via this URL:", public_url)
app.run(port=5000)


In [None]:
#this is an example request

import requests

data = {
    'query': "why?"
}

response = requests.post('https://b400-34-86-206-243.ngrok-free.app/ask', json=data)

if response.status_code == 200:
    print('Response from the server:')
    print(response.json())
else:
    print(f'Request failed with status code {response.status_code}')

In [None]:
#this is for creating streamlit app using api

import streamlit as st
import requests


# Function to send POST request to your Flask API
def send_query(query):
    # Add a template to guide the AI's answer
    query_with_template = f"{query}. Please provide an answer according to the content of the rules, specifying the rule and its index. "
    data = {
        'query': query_with_template
    }
    url = st.secrets["url"]
    response = requests.post(url, json=data)
    return response.json()['answer']

# Chat history to display the conversation
chat_history = []

# Streamlit layout
st.title("WFDF Rules AI")

user_input = st.text_input("You can ask questions about the rules from here: ","Is it considered a foul if the thrower's hand hits the marker after the throw?")

if st.button('Send'):
    # Get the API response
    response = send_query(user_input)

    # Add the user query and API response to the chat history
    chat_history.append(('You', user_input))
    chat_history.append(('AI', response))

    # Display the chat history
    for chat in chat_history:
        st.markdown(f"**{chat[0]}**: {chat[1]}")
