In [15]:
## import statements

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq

In [5]:
#Loading the data

loader=PyPDFLoader(r"C:\Users\harsu\OneDrive\Anesthesia Texts\Guideline PDFs\2006 CMAJ Neurologic Determination of Death.pdf")
docs=loader.load()

In [7]:
# Making Chunks

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=50)
chunks=text_splitter.split_documents(docs)

### Creating Embeddings

In [8]:
# Load Hugging Face token into environment for API access

os.environ['HF_TOKEN']=os.getenv("HF_TOKEN")

In [10]:
# Create text embeddings using Hugging Face's MiniLM model

embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

### Creating Vector Database

In [11]:
vectordb=Chroma.from_documents(documents=chunks,embedding=embeddings)

In [12]:
# Saving to the disk

vectordb=Chroma.from_documents(documents=chunks,embedding=embeddings,persist_directory="./data/chroma_db")

In [13]:
# Loading from the disk

db= Chroma(persist_directory="./data/chroma_db", embedding_function=embeddings )

### Setting Up LLM Access

In [14]:
# Set Groq API key from environment variables 

os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")

In [None]:
# Load Groq Llama Model

llm=ChatGroq(model="llama-3.1-8b-instant")
llm.invoke("Hey I am Harrie and i am MscAC student")

AIMessage(content="Hello Harry, nice to meet you. Being an MSc AC (Master of Science in Accounting) student, I'm sure you're handling a lot of coursework and assignments. What specific areas of accounting are you most interested in, such as financial accounting, managerial accounting, taxation, or auditing?", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 60, 'prompt_tokens': 46, 'total_tokens': 106, 'completion_time': 0.098424499, 'prompt_time': 0.002452841, 'queue_time': 0.188592746, 'total_time': 0.10087734}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_50a6be1b6f', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None}, id='run--cb3259cf-a3b8-4b24-8418-85d9dd8cbdf3-0', usage_metadata={'input_tokens': 46, 'output_tokens': 60, 'total_tokens': 106})