In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
import fitz
from io import BytesIO
from langchain import FAISS
from typing import Optional
from langchain_cohere import CohereEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [None]:
def extract_text_and_urls(
        uploaded_pdf: Optional[BytesIO] = None,
        pdf_path: Optional[str] = None
) -> str:
    doc = (
        fitz.open(pdf_path) if pdf_path
        else fitz.open(stream=uploaded_pdf.read(), filetype="pdf")
    )
    _text = ""

    for page in doc:
        # Extract text and append to the _text variable
        _text += page.get_text()

        # Extract links and the corresponding text
        links = page.get_links()
        for link in links:
            if "uri" in link and "from" in link:
                link_uri = link["uri"]
                link_rect = fitz.Rect(link["from"])

                link_text = page.get_text("text", clip=link_rect)
                # Append the link URI to the extracted text
                _text += f"\n{link_text} ({link_uri})"
    
    doc.close()
    return _text

In [None]:
def process_pdf(pdf_path: str) -> FAISS:
    embeddings = CohereEmbeddings(model="embed-multilingual-v2.0")
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len,
        is_separator_regex=False,
    )
    
    text, urls = extract_text_and_urls(pdf_path=pdf_path)
    
    chunks = text_splitter.split_text(text)
    
    return FAISS.from_texts(chunks, embeddings)

In [None]:
knowledge_base = process_pdf("/Users/chaitanyabasava/Desktop/Sai Naga Viswa Chaitanya_Basava_resume.pdf")

In [None]:
from model import LLM, LLMBuilder
from langchain_core.prompts import ChatPromptTemplate

In [None]:
llm = LLMBuilder.get_llm(LLM.COHERE_COMMAND_R)

In [None]:
prompt = ChatPromptTemplate.from_template("tell me a lame joke about {topic}")

openrouter_chain = prompt | llm

In [None]:
print(openrouter_chain.invoke({"topic": "F.R.I.E.N.D.S"}))