In [1]:
!pip install google-cloud-vision
!pip install google-cloud-core
!pip install PyMuPDF

!pip install langchain langchain-community langchain-huggingface sentencepiece langchain_chroma
# !pip install pinecone-client
!pip install -qU langchain-groq

!pip install streamlit
!npm install localtunnel@2.0.2

Collecting PyMuPDF
  Downloading PyMuPDF-1.24.7-cp310-none-manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting PyMuPDFb==1.24.6 (from PyMuPDF)
  Downloading PyMuPDFb-1.24.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.4 kB)
Downloading PyMuPDF-1.24.7-cp310-none-manylinux2014_x86_64.whl (3.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.5/3.5 MB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hDownloading PyMuPDFb-1.24.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (15.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.7/15.7 MB[0m [31m60.6 MB/s[0m eta [36m0:00:00[0m00:01[0m:00:01[0m
[?25hInstalling collected packages: PyMuPDFb, PyMuPDF
Successfully installed PyMuPDF-1.24.7 PyMuPDFb-1.24.6
Collecting langchain
  Downloading langchain-0.2.10-py3-none-any.whl.metadata (6.9 kB)
Collecting langchain-community
  Downloading langchain_community-0.2.9-py3-none-any.whl.metadata (2.5 kB)
Colle

In [2]:
import fitz

from PIL import Image

import cv2
import numpy as np
import os

from google.auth import exceptions
from google.oauth2 import service_account
from google.cloud import vision

import io
import re

from langchain.prompts import PromptTemplate

In [203]:
def get_columns_mask(image):
    gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # vert_kernel = np.ones((17,1))
    # vertical = cv2.dilate(gray_scale, vert_kernel, iterations=1)
    # vertical = cv2.dilate(vertical, np.ones((15,2)), iterations=1)
    # vertical = cv2.erode(vertical, np.ones((15,5)), iterations=2)
    vert_kernel = np.ones((17,1))
    vertical = cv2.dilate(gray_scale, vert_kernel, iterations=1)
    vertical = cv2.dilate(vertical, np.ones((15,1)), iterations=1)
    vertical = cv2.erode(vertical, np.ones((15,3)), iterations=7)
    _, vertical = cv2.threshold(vertical, 250, 255, cv2.THRESH_BINARY_INV)
    
    contours, _ = cv2.findContours(vertical, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)

    img_copy = np.zeros_like(vertical)

    vertical_lines = []

    for contour in contours:
        p = cv2.arcLength(contour, False)
        
        if p/2 > 500:
            img_copy = cv2.drawContours(img_copy, contour, -1, (255,255,255), 2)
            
            vertical_lines.append(contour)
    
    if len(vertical_lines) == 0 or len(vertical_lines) == 1:
        return None
    
    sorted_lines = sorted(vertical_lines, key=lambda x: x.mean(axis=0)[0][0])
    prev_x = 0

    masks = []

    for line in sorted_lines:
        max_x, max_y = line.max(axis=0)[0]
        mean_x = line.mean(axis=0)[0][0]
        
        _, min_y = line.min(axis=0)[0]

        if max_x - prev_x > 200:
            # mask = np.zeros_like(img_copy)
            mask = np.zeros_like(image)
            mask[min_y:max_y, prev_x:max_x, :] = 1
            
            masks.append(mask)
        prev_x = int(mean_x)
    
    return masks


def get_tables(img):
    
    # print(img.shape)
    
    masks = get_columns_mask(img)
    
    if masks is None:
        return [img]
    
    images = []
    
    for mask in masks:
        images.append((img * mask).astype(np.uint8))
        
    
    for mask in masks:
        img = img * (mask == 0).astype(np.uint8)
    
    return img, *images

In [204]:
def pil_to_cv2(image):
    # Convert PIL image to NumPy (RGB)
    image_np = np.array(image.convert('RGB'))
    
    # Convert RGB to BGR (OpenCV uses BGR order)
    image_np = image_np[:, :, ::-1].copy()
    
    return image_np


def resize_and_pad_images(images):
    # Step 1: Determine max height and width
    max_width = 0
    max_height = 0

    # Find maximum dimensions among all images
    for img in images:
        height, width = img.shape[:2]  # Get image height and width
        if width > max_width:
            max_width = width
        if height > max_height:
            max_height = height

    # Step 2: Resize and pad images
    padded_images = []
    for img in images:
        height, width = img.shape[:2]
        if width < max_width or height < max_height:
            # Calculate padding
            # top = (max_height - height) // 2
            top = 0
            # bottom = max_height - height - top
            bottom = 0
            left = (max_width - width) // 2
            right = max_width - width - left
            # Pad the image
            padded_img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
            padded_images.append(padded_img)
        else:
            padded_images.append(img)

    return padded_images

In [2]:
%%writefile ocr.py

from google.auth import exceptions
from google.oauth2 import service_account
from google.cloud import vision
import io
import cv2
import os
import numpy as np

class OCR:
    
    def __init__(self):
        self.__load_api_key()        
        self.client = vision.ImageAnnotatorClient()

    def __load_api_key(self):        
        # Set the environment variable
        os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/kaggle/input/project/google_vision_service.json'

        # Verify the environment variable
        credentials_path = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
        print(f"GOOGLE_APPLICATION_CREDENTIALS={credentials_path}")

        try:
            # Load the credentials
            credentials = service_account.Credentials.from_service_account_file(credentials_path)
            print("Credentials loaded successfully")
        except FileNotFoundError:
            print(f"The file {credentials_path} was not found.")
        except exceptions.DefaultCredentialsError as e:
            print(f"An error occurred: {e}")
        
    def detect_text_from_image(self, image: np.ndarray):
        
        # convert to gray scale and use adaptive thresholding
        # gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        # scanned_image = cv2.adaptiveThreshold(gray_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        #                                     cv2.THRESH_BINARY, 11, 2)

        cv2.imwrite("./temp.png", image)
        
        with io.open("./temp.png", "rb") as img_file:
            content = img_file.read()
    
        vision_image = vision.Image(content=content)
        response = self.client.text_detection(image=vision_image)
        
        os.remove("./temp.png")
            
        return response.full_text_annotation.text


Writing ocr.py


In [3]:
%%writefile document.py

import fitz
from PIL import Image
import numpy as np


def pil_to_cv2(image):
    # Convert PIL image to NumPy (RGB)
    image_np = np.array(image.convert('RGB'))
    
    # Convert RGB to BGR (OpenCV uses BGR order)
    image_np = image_np[:, :, ::-1].copy()
    
    return image_np


class Document:
    def __init__(self):
        self.document_path = None
        self.document = None
        self.num_pages = None
        
    def load_document(self, document_path):
        try:
            self.document_path = document_path
            self._open_document()
            self.num_pages = len(self.document)
            print("Document Loaded")
            self._close_document()
            
        except Exception as e:
            print(e)
            
    def get_page(self, page_num):    
        if page_num >= self.num_pages or page_num < -1:
            print( "Page number out of range")
            return
        
        if page_num == -1:
            page_num = self.num_pages - 1
        
        self._open_document()
        page = self.document.load_page(page_num)
        # Render the page as an image (RGBA)
        pix = page.get_pixmap()
        
        image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        self._close_document()

        return pil_to_cv2(image)
    
    def get_all_pages(self):
        pages = []
        
        for i in range(self.num_pages):
            pages.append(self.get_page(i))
        
        return pages
        
    def _open_document(self):
        self.document = fitz.open(self.document_path)

    def _close_document(self):
        self.document.close()
    
    def __len__(self):
        return self.num_pages
    
    
    def __getitem__(self, idx):
        if isinstance(idx, slice):
            return [self.get_page(i) for i in range(*idx.indices(self.num_pages))]
        elif isinstance(idx, int):
            return self.get_page(idx)
        else:
            raise TypeError("Invalid argument type. Must be int or slice.")

Writing document.py


In [4]:
%%writefile document_reader.py

import numpy as np
import re
from langchain.prompts import PromptTemplate
import cv2

def get_columns_mask(image):
    gray_scale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    vert_kernel = np.ones((17,1))
    vertical = cv2.dilate(gray_scale, vert_kernel, iterations=1)
    vertical = cv2.dilate(vertical, np.ones((15,1)), iterations=1)
    vertical = cv2.erode(vertical, np.ones((15,3)), iterations=7)
    _, vertical = cv2.threshold(vertical, 250, 255, cv2.THRESH_BINARY_INV)

    contours, _ = cv2.findContours(vertical, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)

    img_copy = np.zeros_like(vertical)


    vertical_lines = []

    for contour in contours:
        p = cv2.arcLength(contour, False)
        
        if p/2 > 200:
            img_copy = cv2.drawContours(img_copy, contour, -1, (255,255,255), 2)
            
            vertical_lines.append(contour)

    sorted_lines = sorted(vertical_lines, key=lambda x: x.mean(axis=0)[0][0])
    prev_x = 0

    masks = []

    for line in sorted_lines:
        max_x, max_y = line.max(axis=0)[0]
        mean_x = line.mean(axis=0)[0][0]
        
        min_x, min_y = line.min(axis=0)[0]

        if max_x - prev_x > 200:
            mask = np.zeros_like(image)
            
            mask[min_y:max_y, prev_x:max_x,:] = 1
            
            masks.append(mask)
        prev_x = int(min_x)
    
    return masks
    

def get_tables(img):
    
    # print(img.shape)
    
    masks = get_columns_mask(img)
    
    if masks is None:
        return [img]
    
    images = []
    
    for mask in masks:
        images.append((img * mask).astype(np.uint8))
        
    
    for mask in masks:
        img = img * (mask == 0).astype(np.uint8)
    
    return img, *images

def resize_and_pad_images(images):
    # Step 1: Determine max height and width
    max_width = 0
    max_height = 0

    # Find maximum dimensions among all images
    for img in images:
        height, width = img.shape[:2]  # Get image height and width
        if width > max_width:
            max_width = width
        if height > max_height:
            max_height = height

    # Step 2: Resize and pad images
    padded_images = []
    for img in images:
        height, width = img.shape[:2]
        if width < max_width or height < max_height:
            # Calculate padding
            # top = (max_height - height) // 2
            top = 0
            # bottom = max_height - height - top
            bottom = 0
            left = (max_width - width) // 2
            right = max_width - width - left
            # Pad the image
            padded_img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
            padded_images.append(padded_img)
        else:
            padded_images.append(img)

    return padded_images

class DocumentReader:
    
    def __init__(self, document, ocr):    
        self.document = document
        self.ocr = ocr
        self.text = None
        # self._read_document()
        
    def _read_document(self, llm=None):
        
        max_page_len = 3
        
        self.text = ""
        
        all_pages = self.document.get_all_pages()
        temp_pages = []
        
        for page in all_pages:
            for p in get_tables(page):
                temp_pages.append(p)
            
        if len(all_pages) == len(temp_pages):
            print("no tables found")
        
        self.all_pages = temp_pages
        doc_len = len(self.all_pages)
        
        if doc_len < max_page_len:
            combined_doc = np.vstack(resize_and_pad_images(self.all_pages))
            detected_text = self.ocr.detect_text_from_image(combined_doc)
            print(detected_text)
            print("******************************************")
            self.text = self._post_process_text(detected_text, llm)
        else:
            for i in range(int(np.ceil(doc_len / max_page_len))):
                pages = self.all_pages[i*max_page_len: (i+1)*max_page_len]
                combined_doc = np.vstack(resize_and_pad_images(pages))
                detected_text = self.ocr.detect_text_from_image(combined_doc)
                print(detected_text)
                print("******************************************")
                processed_text = self._post_process_text(detected_text, llm)
                self.text += "\n" + processed_text
    
#     def _post_process_text(self, text):
#         # separate by .
#         pattern = r'\.(?!\d)'
#         # separate by . and :
#         # pattern = r'(?<!\d)(?<!\d:)[.:](?!\d)'

#         result = re.split(pattern, text.replace("\n", " "))

#         return ".\n".join([text for text in result if text])
    
    def _post_process_text(self, text, llm):
        # separate by .
        pattern = r'\.(?!\d)'
        # separate by . and :
        # pattern = r'(?<!\d)(?<!\d:)[.:](?!\d)'

        result = re.split(pattern, text.replace("\n", " "))
        result = ".\n".join([text for text in result if text])
        
        if llm is None:
            return result
        
        correction_template = """
Inside the space dilimited by --- is a text that has some spelling mistakes and some words may be repeated.
Your task is to correct the spelling mistakes, remove the repeated words, seperate paragraphs when the context changes and add fullstops and commas in necessary places. But do not give headings to paragraphs leave it as it is.
Do not make any changes that are not mentioned above.
---{context}---
"""

        correction_prompt = PromptTemplate.from_template(template=correction_template)
        correction_chain = correction_prompt | llm
        
        response = correction_chain.invoke({"context":result})
        
        return "\n".join(response.content.split("\n")[1:])
        

Writing document_reader.py


In [5]:
%%writefile vector_store.py

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_chroma import Chroma

class VectorStore:
    def __init__(self):
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=2000,
            chunk_overlap=500,
            length_function=len,
            is_separator_regex=False,
        )
        
        self.embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
        self.vectorstore = None
    
    def split_text_and_create_vector_db(self, text):
        texts = self.text_splitter.split_text(text)
        self.text_splits = self.text_splitter.create_documents(texts)
        
        self.vectorstore = Chroma.from_documents(documents=self.text_splits, embedding=self.embedding_function)
        self.retriever = self.vectorstore.as_retriever(search_kwargs={"k":5})
#         self.retriever = self.vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"k":5, 'score_threshold': 0.0})
        

Writing vector_store.py


In [6]:
%%writefile generator.py

from langchain.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_groq import ChatGroq
from langchain_core.prompts import MessagesPlaceholder
from langchain.schema import SystemMessage, HumanMessage, AIMessage

class Generator:
    
    def __init__(self, retriever):
        
        self.llm = ChatGroq(model="llama3-70b-8192")
        self.retriever = retriever
        
        self.system_prompt = """Use the following pieces of context to answer the question at the end.
Only use the information present in the context to answer the question.
Even if you know the answer, but if is not present in the given context do not answer the question.
Return the answer as it is in the document and do not try to summarize or expand the topic.
Do not make things up that are otherwise not in the context.
If answer is not present in the context, just say that you don't know, don't try to make up an answer.
If you don't understand the question just say that you didn't the question and to repeat the question.
Always say "Thanks for asking!" at the end of the answer. 
{context}
"""

        self.prompt = ChatPromptTemplate.from_messages(
            [
                ("system", self.system_prompt),
                MessagesPlaceholder("chat_history"),
                ("human", "{input}"),
            ]
        )
        
        self.chat_history = []
        
        self.question_answer_chain = create_stuff_documents_chain(self.llm, self.prompt)
        self.rag_chain = create_retrieval_chain(self.retriever, self.question_answer_chain)
        
    def generate_answer(self, query):
        response = self.rag_chain.invoke({"input": query, "chat_history":self.chat_history})

        self.chat_history.extend(
        [
            HumanMessage(response['input']),
            AIMessage("context"),
        ])
        
        return response
        

Writing generator.py


In [22]:
from getpass import getpass
import os

os.environ["GROQ_API_KEY"] = getpass()

from langchain_groq import ChatGroq

llm = ChatGroq(model="llama3-70b-8192")



 ························································


In [131]:
correction_template = """
Inside the space dilimited by --- is a text that has some spelling mistakes and some words may be repeated.
Your task is to correct the spelling mistakes, remove the repeated words, seperate paragraphs when the context changes and add fullstops and commas in necessary places.
Do not make any changes that are not mentioned above.
---{context}---
"""

correction_prompt = PromptTemplate.from_template(template=correction_template)
correction_chain = correction_prompt | llm



response = correction_chain.invoke({"context":result})

In [137]:
print(response.content)

Here is the corrected text:

**State Mathematical Model**

A static mathematical model describes a system at a specific point in time, without considering its behaviour over time. The variables are state and do not change with time. This is suitable for systems with constant or unchanging parameters. It represents a system state at a particular instant.

**Dynamic Mathematical Model**

A dynamic mathematical model captures the behaviour of a system over time, incorporating time-dependent relationships. The variables are time-dependent and change over time. It is suitable for systems with changing parameters and dynamic behaviour. It represents the dynamic behaviour of the system.


In [156]:
ocr = OCR()

GOOGLE_APPLICATION_CREDENTIALS=/kaggle/input/project/google_vision_service.json
Credentials loaded successfully


In [157]:
document_path = "/kaggle/input/pdf-project/handwritten.pdf"

doc = Document()
doc.load_document(document_path)

Document Loaded


In [158]:
doc_reader = DocumentReader(doc, ocr)
doc_reader._read_document(llm=llm)

no tables found


In [53]:
vec = VectorStore()
vec.split_text_and_create_vector_db(doc_reader.text)



In [54]:
generator = Generator(vec.retriever)

In [66]:
response = generator.generate_answer("Who are you?")

In [67]:
print(response['answer'])

I'm an AI designed to answer questions based on the provided context. I don't have personal information or identity. I exist solely to assist and provide information within the scope of the given context. Thanks for asking!


In [None]:
print("\n\n\n\n".join(x.page_content for x in response['context']))

In [159]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.schema import SystemMessage, HumanMessage, AIMessage
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
from langchain_chroma import Chroma
from langchain.chains import RetrievalQA
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings

In [None]:
HUGGINGFACEHUB_API_TOKEN = getpass()
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

In [None]:
# model_name = "meta-llama/Meta-Llama-3-8B-Instruct"

# llm = HuggingFaceEndpoint(
#     repo_id=model_name,
#     temperature=0.001,
#     top_p = 0.99,
#     repetition_penalty = 1.2,
# )


# os.environ["GROQ_API_KEY"] = getpass()

# from langchain_groq import ChatGroq

# llm = ChatGroq(model="llama3-70b-8192")



In [160]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=500,
    length_function=len,
    is_separator_regex=False,
)

texts = text_splitter.split_text(doc_reader.text)
texts = text_splitter.create_documents(texts)

In [161]:
# embeddings_name = "sentence-transformers/all-MiniLM-L6-v2"

embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# embeddings = HuggingFaceEmbeddings(model_name=embeddings_name)



In [162]:
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.prompts import MessagesPlaceholder

system_prompt = """Use the following pieces of context to answer the question at the end.
Only use the information present in the context to answer the question.
Return the answer as it is in the document and do not try to summarize or expand the topic.
Do not make things up that are otherwise not in the context.
If answer is not present in the context, just say that you don't know, don't try to make up an answer.
If you don't understand the question just say that you didn't the question and to repeat the question.
Always say "Thanks for asking!" at the end of the answer. 
{context}
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [197]:
# vectorstore = Chroma.from_documents(documents=texts, embedding=embedding_function)
retriever = vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"k":3, 'score_threshold': 0.0})

In [200]:
# vectorstore.similarity_search_with_relevance_scores("I am a data scientist. how can in help in big data")
retriever.invoke("Characterstics of big data")

[Document(page_content='Big Data is a field that systematically extracts information from data, which is too large or complex to be dealt with by traditional data-processing software. Currently, big data refers to the use of predictive behavior analytics methods that extract value from the data. This can find new correlations, spot business trends, prevent diseases, combat crimes, and more.\n\nCharacteristics of Big Data are:\n\ni) Volume: This refers to the sheer amount of data being generated. The amount can reach terabytes, petabytes, or even exabytes, from social media, sensors, financial transactions, and more. Real-world Example: The sheer volume of customer interactions across millions of products creates a massive dataset.'),
 Document(page_content="Characteristics of Big Data are:\n\ni) Volume: This refers to the sheer amount of data being generated. The amount can reach terabytes, petabytes, or even exabytes, from social media, sensors, financial transactions, and more. Real-

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
chat_history = []

In [None]:
query = "What are the characterstics of big data?"

response = rag_chain.invoke({"input": query, "chat_history":chat_history})

chat_history.extend(
[
    HumanMessage(response['input']),
    AIMessage("context"),
])

In [None]:
print(response['answer'], "\n\n")
print(*[x.page_content for x in response['context']])

In [None]:
# !pip install streamlit
# !npm install localtunnel@2.0.2

In [10]:
%%writefile app.py

import streamlit as st
from langchain_groq import ChatGroq
from generator import Generator
from document import Document
from document_reader import DocumentReader
from vector_store import VectorStore
from ocr import OCR
from langchain_groq import ChatGroq
import os

st.title("PDF Bot")

if 'step' not in st.session_state:
    st.session_state.step = 0

if 'ocr' not in st.session_state:
    with st.spinner('Initializing...'):
        st.session_state.file_loaded = False
        os.environ["GROQ_API_KEY"] = "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"
        os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
#         llm = ChatGroq(model="llama3-70b-8192")
        st.session_state.ocr = OCR()
        st.session_state.doc = Document()
#         document_path = "/kaggle/input/pdf-project/handwritten.pdf"
#         st.session_state.doc.load_document(document_path)
        st.session_state.doc_reader = DocumentReader(st.session_state.doc, st.session_state.ocr)
#         st.session_state.doc_reader._read_document(llm=llm)

        st.session_state.vec = VectorStore()
#         st.session_state.vec.split_text_and_create_vector_db(st.session_state.doc_reader.text)
#         st.session_state.generator = Generator(st.session_state.vec.retriever)

def create_generator(pdf_path):
    with st.spinner("Reading pdf..."):
        llm = ChatGroq(model="llama3-70b-8192")
        st.session_state.doc.load_document(pdf_path)
        st.session_state.doc_reader._read_document(llm=llm)

        if st.session_state.vec.vectorstore is not None:
            st.session_state.vec.vectorstore.reset_collection()

        st.session_state.vec.split_text_and_create_vector_db(st.session_state.doc_reader.text)

        st.session_state.generator = Generator(st.session_state.vec.retriever)
#         print("ready to answer questions")
        
def pdf_changed():
    st.session_state.file_loaded = False
    st.session_state.messages = []

def main():
        
#     if st.session_state.step == 0:
#         with st.spinner('Initializing...'):
#             os.environ["GROQ_API_KEY"] = "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"
#             os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

#             llm = ChatGroq(model="llama3-70b-8192")

#             # loading and reading document        
#             ocr = OCR()
#             document_path = "/kaggle/input/pdf-project/handwritten.pdf"
#             doc = Document()
#             doc.load_document(document_path)
#             doc_reader = DocumentReader(doc, ocr)
#             doc_reader._read_document(llm=llm)

#             vec = VectorStore()
#             vec.split_text_and_create_vector_db(doc_reader.text)

#             generator = Generator(vec.retriever)
#             print("Ready to answer question")
#             st.session_state.step += 1
        
    st.sidebar.title("Upload PDF Documents")
    uploaded_file = st.sidebar.file_uploader("Upload a PDF file", type=["pdf"], on_change=pdf_changed)
    
    print(uploaded_file is not None)
    
    if uploaded_file is not None:
        
        if not st.session_state.file_loaded:
            
            pdf_path = uploaded_file.name
            print(pdf_path)
            pdf_path = uploaded_file.name
            with open(pdf_path, "wb") as f:
                f.write(uploaded_file.getbuffer())
            
            create_generator(pdf_path)
            st.session_state.file_loaded = True
            
            
        
         # Initialize chat history
        if "messages" not in st.session_state:
            st.session_state.messages = []

        # Display chat messages from history on app rerun
        for message in st.session_state.messages:
            with st.chat_message(message["role"]):

                st.markdown(message["content"])
                if message["role"] == 'assistant':
                    with st.expander("Context"):
                        st.write(message['context'])

        # React to user input
        if query := st.chat_input("What is up?"):
            # Display user message in chat message container
            st.chat_message("user").markdown(query)
            # Add user message to chat history
            st.session_state.messages.append({"role": "user", "content": query})
            with st.spinner('Generating Response...'):
    #             answer = query
    #             context = ["fdasf s", "fdasfas", "fsdfasdf"]
                response = st.session_state.generator.generate_answer(query)
                answer = response['answer']
                context = "\n\n**********************************\n\n".join(x.page_content for x in response['context'])

            # Display assistant response in chat message container
            with st.chat_message("assistant"):
                st.markdown(answer)
                with st.expander("Context"):
                    st.write(context)

            # Add assistant response to chat history
            st.session_state.messages.append({"role": "assistant", "content": answer, "context": context})

    else:
        st.session_state.file_loaded = False
        st.session_state.messages = []

if __name__ == "__main__":
    main()

Overwriting app.py


In [11]:
!curl ipv4.icanhazip.com

130.211.210.208


In [None]:
!streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.19.2.2:8501[0m
[34m  External URL: [0m[1mhttp://130.211.210.208:8501[0m
[0m
your url is: https://cold-wombats-watch.loca.lt
GOOGLE_APPLICATION_CREDENTIALS=/kaggle/input/project/google_vision_service.json
Credentials loaded successfully
  warn_deprecated(
2024-07-21 19:50:47.500804: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-21 19:50:47.500927: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-21 19:50:47.672543: E external/local_xla/xla/stream_