In [2]:
from flask import Flask, request
from langchain_community.llms import Ollama
from langchain_community.vectorstores import Milvus
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_community.document_loaders import PDFPlumberLoader
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.prompts import PromptTemplate

In [3]:
app = Flask(__name__)

In [5]:
folder_path = "db"
cached_llm = Ollama(model="llama3")
embedding = FastEmbedEmbeddings()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1024, chunk_overlap=80, length_function=len, is_separator_regex=False
)
raw_prompt = PromptTemplate.from_template(
    """
    <s>[INST] You are a technical assistant good at searching docuemnts. If you do not have an answer from the provided information say so. [/INST] </s>
    [INST] {input}
    Context: {context}
    Answer:
    [/INST]
    """
)

  from .autonotebook import tqdm as notebook_tqdm
ort_config.json: 100%|██████████| 1.27k/1.27k [00:00<00:00, 5.30MB/s]
config.json: 100%|██████████| 706/706 [00:00<00:00, 3.49MB/s]
README.md: 100%|██████████| 28.0/28.0 [00:00<00:00, 135kB/s]
tokenizer_config.json: 100%|██████████| 1.24k/1.24k [00:00<00:00, 4.42MB/s]

[A

[A[A


special_tokens_map.json: 100%|██████████| 695/695 [00:00<00:00, 2.20MB/s]



.gitattributes: 100%|██████████| 1.52k/1.52k [00:00<00:00, 4.64MB/s]
Fetching 9 files:  11%|█         | 1/9 [00:02<00:17,  2.24s/it]

vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 424kB/s]
tokenizer.json: 100%|██████████| 711k/711k [00:01<00:00, 668kB/s]

[A
[A
[A
[A
[A
[A
model_optimized.onnx: 100%|██████████| 66.5M/66.5M [00:05<00:00, 11.5MB/s]
Fetching 9 files: 100%|██████████| 9/9 [00:07<00:00,  1.21it/s]


In [8]:
@app.route("/ai", methods=["POST"])
def aiPost():
    print("Post /ai called")
    json_content = request.json
    query = json_content.get("query")
    print(f"query: {query}")
    response = cached_llm.invoke(query)
    print(response)
    response_answer = {"answer": response}
    return response_answer

In [13]:
@app.route("/pdf", methods=["POST"])
def pdfPost():
    file = request.files["file"]
    file_name = file.filename
    save_file = "pdf/" + file_name
    file.save(save_file)
    print(f"filename: {file_name}")
    loader = PDFPlumberLoader(save_file)
    docs = loader.load_and_split()
    print(f"docs len={len(docs)}")
    chunks = text_splitter.split_documents(docs)
    print(f"chunks len={len(chunks)}")
    milvus_store.from_documents(chunks)
    milvus_store.persist()
    response = {
        "status": "Successfully Uploaded",
        "filename": file_name,
        "doc_len": len(docs),
        "chunks": len(chunks),
    }
    return response

In [14]:
@app.route("/ask_pdf", methods=["POST"])
def askPDFPost():
    print("Post /ask_pdf called")
    json_content = request.json
    query = json_content.get("query")
    print(f"query: {query}")
    print("Creating chain")
    retriever = milvus_store.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={
            "k": 20,
            "score_threshold": 0.1,
        },
    )
    document_chain = create_stuff_documents_chain(cached_llm, raw_prompt)
    chain = create_retrieval_chain(retriever, document_chain)
    result = chain.invoke({"input": query})
    print(result)
    sources = []
    for doc in result["context"]:
        sources.append(
            {"source": doc.metadata["source"], "page_content": doc.page_content}
        )
    response_answer = {"answer": result["answer"], "sources": sources}
    return response_answer

In [20]:
def start_app():
    app.run(host="0.0.0.0", port=8085, debug=True)

In [21]:
if __name__ == "__main__":
    start_app()

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8085
 * Running on http://192.168.33.55:8085
[33mPress CTRL+C to quit[0m
 * Restarting with stat
0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/home/mydocker/.conda/envs/langchain/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/mydocker/.conda/envs/langchain/lib/python3.11/site-packages/traitlets/config/application.py", line 1074, in launch_instance
    app.initialize(argv)
  File "/home/mydocker/.conda/envs/langchain/lib/python3.11/site-packages/traitlets/config/application.py", line 118, in inner
    return method(app, *args

SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
