In [0]:
# Initialize Spark session
spark

# Install the required packages
%pip install langchain-community langchain beautifulsoup4 pypdf langchain-openai chromadb faiss-cpu sentence-transformers


In [0]:
dbutils.library.restartPython()

In [0]:
## Pdf reader
from langchain_community.document_loaders import PyPDFLoader

# Ensure the file path is correct
file_path = '/Volumes/workspace/default/tmp/Reliance Q125 earnings transcript.pdf'
loader = PyPDFLoader(file_path)
docs = loader.load()
docs

In [0]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)
len(documents)

In [0]:
## Vector Embedding And Vector Store
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings()
db = Chroma.from_documents(documents, embeddings)


In [0]:
from openai import OpenAI
import os

def get_llm_answer(query, reference_data):
    
    #DATABRICKS_TOKEN = os.environ.get('DATABRICKS_TOKEN')
    # Alternatively in a Databricks notebook you can use this:
    DATABRICKS_TOKEN = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()

    client = OpenAI(
        api_key=DATABRICKS_TOKEN,
        base_url="https://dbc-698fb84c-be59.cloud.databricks.com/serving-endpoints"
    )

    response = client.chat.completions.create(
        model="databricks-llama-4-maverick",
        messages=[
            {"role": "system", "content": "you are helpful AI Assiantant and share the response in short and precise with some meaningful emojis"},
            {"role": "user", "content": f"generate the response for this {query} using this reference content{reference_data} and share the report in point wise with some emojis"}
        ],
        temperature=0.8    
    )
    return response.choices[0].message.content

In [0]:

query = "what are the future plans? what is expected growth for this year?"


retireved_results=db.similarity_search(query)
reference = retireved_results[0].page_content

print(reference)

In [0]:
llm_result = get_llm_answer(query=query, reference_data=reference)
print(llm_result)

In [0]:
query1 = "can you summarize the key highlights of the earnings call? and recommend to stay invest or not?"


retireved_results=db.similarity_search(query)
reference1 = retireved_results[0].page_content

llm_result = get_llm_answer(query=query1, reference_data=reference1)
print(llm_result)