In [None]:
import json
import os
import requests
from datetime import datetime, timedelta
from dotenv import load_dotenv
#Import other libraries
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI



In [None]:
def generateToken():
    cwd = os.getcwd()
    dotenv_path = os.path.join(cwd,"path to auth.env file")
    load_dotenv(dotenv_path)
    time_generated_str = os.getenv("TIME_GENERATED")
    print(time_generated_str)
    TimeGenerated = datetime.fromisoformat(time_generated_str)
    if TimeGenerated + timedelta(minutes=60) < datetime.now():
        token_url = os.getenv("ISSUER_URL")+"/v1/token"
        print(token_url)
        client_id = os.getenv("CLIENT_ID")
        client_secret = os.getenv("CLIENT_SECRET")
        scope = os.getenv("SCOPE")
        # Define the payload
        payload = {
            "grant_type": "client_credentials",
            "client_id": client_id,
            "client_secret": client_secret,
            "scope": scope
        }
        # Make the POST request
        response = requests.post(token_url, data=payload, headers={"Content-Type": "application/x-www-form-urlencoded"})
        print(response.json())
        repsonse_json = response.json()
        token = repsonse_json["access_token"]
        TimeGenerated = datetime.now()
        # Update the .env file with the new token
        with open('C:/Users/AK57630/Introduction to LangChain for Agentic AI/Module 2/auth.env', 'r') as file:
            lines = file.readlines()
        with open('.env', 'w') as file:
            for line in lines:
                if line.startswith("OPENAI_API_KEY="):
                    file.write(f"OPENAI_API_KEY={token}\n")
                elif line.startswith("TIME_GENERATED="):
                    file.write(f"TIME_GENERATED={TimeGenerated}\n")
                else:
                    file.write(line)    
        os.environ.pop("OPENAI_API_KEY", None)                
        load_dotenv()
        print(os.getenv("OPENAI_API_KEY"))   


In [None]:
generateToken()

In [None]:
cwd = os.getcwd()
DOC_PATH = os.path.join(cwd,'Howard_Roark_speech.pdf')
#DOC_PATH = "./Howard_Roark_speech.pdf"
CHROMA_PATH = "path to chroma_db"
loader = PyPDFLoader(DOC_PATH)
pages = loader.load()
print(pages)

In [None]:
# split the doc into smaller chunks i.e. chunk_size=500
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20)
chunks = text_splitter.split_documents(pages)
print(chunks)

In [None]:
cwd = os.getcwd()
dotenv_path = os.path.join(cwd,'path to auth.env file')
load_dotenv(dotenv_path)
openai_api_key = os.getenv("OPENAI_API_KEY")
header_name = os.getenv('AI_GATEWAY_HEADER_NAME')
header_value = os.getenv('AI_GATEWAY_REGISTRATION_ID')
headers = {
    header_name: header_value
    }
embeddings = OpenAIEmbeddings(model="text-embedding-3-large",default_headers=headers)
db_chroma = Chroma.from_documents(chunks, embeddings, persist_directory=CHROMA_PATH,collection_name="my_rag_collection")

In [None]:
# ----- Retrieval and Generation Process -----

# this is an example of a user question (query)
#query = 'what are the characteristics of a creator?'
query = 'Who is a parasite?'

In [None]:
# retrieve context - top 5 most relevant (closests) chunks to the query vector 
# (by default Langchain is using cosine distance metric)
docs_chroma = db_chroma.similarity_search_with_score(query, k=2)

# generate an answer based on given user query and retrieved context information
context_text = "\n\n".join([doc.page_content for doc, _score in docs_chroma])

In [None]:
# you can use a prompt template
PROMPT_TEMPLATE = """
Answer the question based only on the following context:
{context}
Answer the question based on the above context: {question}.
Provide a detailed answer.
Don’t justify your answers.
Don’t give information not mentioned in the CONTEXT INFORMATION.
Do not say "according to the context" or "mentioned in the context" or similar.
"""

In [None]:
# load retrieved context and user query in the prompt template
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question=query)

In [None]:
# call LLM model to generate the answer based on the given context and query
model = ChatOpenAI(model_name="gpt-4o-mini", temperature=0,default_headers=headers)
response_text = model.invoke(prompt)

In [None]:
print(response_text.content)