# Houssem Mejbri

## PDF → Langchain → Flask → Browser

In [1]:
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from flask import Flask, request, jsonify
from flask_cors import CORS, cross_origin

## Initialise the OpenAI's API KEY

In [3]:
import os
os.environ["OPENAI_API_KEY"] = "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"

## Read pdf file and return its content

In [4]:
from typing_extensions import Concatenate

def readFile(fileName):
    # provide the path of  pdf file
    pdfreader = PdfReader(fileName)
    
    # read text from pdf
    raw_text = ''
    for i, page in enumerate(pdfreader.pages):
        content = page.extract_text()
        if content:
            raw_text += content
            
    #returns the raw text inside the pdf file        
    return raw_text

## Split text

In [5]:
# We need to split the text using Character Text Split such that it should not increase token size
def splitText(raw_text):
    text_splitter = CharacterTextSplitter(
        separator = "\n",
        chunk_size = 800,
        chunk_overlap  = 200,
        length_function = len,
    )
    return text_splitter.split_text(raw_text)

## Create chain

In [6]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI

document_search = None

def createChain(texts):
    global document_search
    # Download embeddings from OpenAI
    embeddings = OpenAIEmbeddings()
    # Create a document search index
    document_search = FAISS.from_texts(texts, embeddings)
    # create a chain to answer questions
    chain = load_qa_chain(OpenAI(), chain_type="stuff")
    return chain

## API code

In [None]:
from flask import Flask, request, jsonify
from flask_cors import CORS, cross_origin

# Initialize Flask app
app = Flask(__name__)
# Enable CORS with specific configurations
CORS(app, supports_credentials=True, allow_headers=['Content-Type'], methods=['POST', 'OPTIONS'])

# Initialize global variables
chain = None  # Initialize chain as None initially
document_search = None  # Initialize document_search as None initially

# Upload file API endpoint
@app.route("/upload", methods=["POST", "OPTIONS"])
@cross_origin(origin='*')
def uploadFile():
    global chain  # Use the global keyword to modify the global variable
    uploaded_file = request.files['file']
    uploaded_file.save("./" + uploaded_file.filename)
    
    # Read raw text from the uploaded file
    raw_text = readFile(uploaded_file.filename)
    
    # Split raw text into individual texts
    texts = splitText(raw_text)
    
    # Create a chain based on the extracted texts
    chain = createChain(texts)
    
    # Respond with a JSON indicating success
    return jsonify({"message": "File uploaded successfully"}), 200

# Chatbot API endpoint
@app.route("/", methods=["POST", "OPTIONS"])
@cross_origin(origin='*')
def chatbot():
    global chain
    global document_search
    
    # Check if the chain is initialized
    if chain is None:
        return jsonify({"error": "Chain not initialized"}), 500

    # Get input data from the request
    data = request.get_json()
    
    # Perform similarity search using document_search
    docs = document_search.similarity_search(data["text"])
    
    # Generate a response using the chain based on input documents and user's question
    response = chain.run(input_documents=docs, question=data["text"])
    
    # Respond with a JSON indicating the response message
    return jsonify({"message": response}), 200

# Run the Flask app
if __name__ == "__main__":
    app.run(debug=False)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
