In [86]:
from flask import Flask, jsonify,request,Blueprint
import openai
import langchain
import pinecone
import pandas as pd
import numpy as np
import tiktoken
import os
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.vectorstores import Pinecone
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain.chains import LLMChain
from langchain_openai import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain import OpenAI
from langchain_community.chat_models import ChatOpenAI

In [87]:
PINECONE_API_KEY=''
docsearch = None
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY

In [88]:
def read_doc(directory):
    file_loader=PyPDFDirectoryLoader(directory)
    documents=file_loader.load()
    return documents

In [89]:
## Divide the docs into chunks
### https://api.python.langchain.com/en/latest/text_splitter/langchain.text_splitter.RecursiveCharacterTextSplitter.html#
def chunk_data(docs,chunk_size=800,chunk_overlap=50):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=chunk_size,chunk_overlap=chunk_overlap)
    docs=text_splitter.split_documents(docs)
    return docs

In [90]:
def get_openai_embeedings():
    embeddings = OpenAIEmbeddings(openai_api_key='')
    return embeddings

In [91]:
def get_pinecone(index_name='hackindex'):
    pc = Pinecone(api_key="")
    index = pc.Index(index_name)
    return pc

In [92]:
def get_chunked_docs(path='documents/'):
    documents_from_folder=read_doc(path)
    documents_chunked=chunk_data(docs=documents_from_folder)
    return documents_chunked
    #len(documents_chunked)
    #documents_chunked
    

In [93]:
def embed_document(path='documents/'):
    chunked = get_chunked_docs(path=path)
    embeddings=get_openai_embeedings()
    #pnecone=get_pinecone()
    docsearch = PineconeVectorStore.from_documents(chunked, embeddings, index_name='hackindex')
    return docsearch


In [94]:
app = Flask(__name__)
# root path not working 
#app.config["APPLICATION_ROOT"] = "/hack/esg"
PREFIX='/hack/esg'

In [95]:
@app.route(PREFIX+'/hello', methods=['GET'])
def get_date():
    return jsonify({'message':'hello'})

In [96]:
@app.route(PREFIX+'/embed', methods=['POST'])
def embed():
    payload = request.get_json()
    embed_document(payload['path'])
    return jsonify({'message':'embedded' , 'status' :'created' , 'code':'201'})

In [97]:
def get_llm():
    llm=ChatOpenAI(model_name="gpt-3.5-turbo",temperature=0.5,openai_api_key="")
    chain=load_qa_chain(llm,chain_type="stuff")
    return chain

In [98]:
def retrieve_query(query,k=2):
    #if docsearch:
        #matching_results=docsearch.similarity_search(query,k=k)
    docsearch = embed_document()
    matching_results=docsearch.similarity_search(query,k=k)
    return matching_results

In [99]:
def retrieve_answers(query):
    doc_search=retrieve_query(query)
    #print(doc_search)
    chain = get_llm()
    response=chain.run(input_documents=doc_search,question=query)
    return response

In [100]:
@app.route(PREFIX+'/query', methods=['GET'])
def query():
    query = request.args.get('query')
    if query:
        response = retrieve_query(query)
        answer =''
        for doc in response:
            answer = answer + doc.page_content
        return jsonify({'message':answer , 'code':'200'})
    return jsonify({'answer' : 'no query given' , 'code':'200'})

In [None]:
app.run()