In [4]:
import os
from langchain_community.llms import Ollama
from dotenv import load_dotenv
from langchain_community.embeddings import OllamaEmbeddings
from langchain.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains import create_retrieval_chain
from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate


In [15]:
# llm = ChatOpenAI(
#         model="gpt-4o-mini",
#         temperature=0,
#         api_key=os.getenv("OPENAI_API_KEY")
#     )
model = 'llama3.1'
llamaURL = "http://localhost:11434"
llm = Ollama(model=model, base_url=llamaURL)

In [16]:
# embedding_model = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"))
embedding_model = OllamaEmbeddings(model=model, base_url=llamaURL)

In [4]:
Text = """
Hyperparameters are configurations set before training a machine learning model. They are not learned during training but are crucial in defining the model's performance and training process. Hyperparameters can be categorized into two main types:

1. Model Hyperparameters:
   - Define the architecture and complexity of the model.
   - Examples include:
     * Number of layers in a neural network
     * Number of neurons in each layer
     * Type of activation functions (e.g., ReLU, Sigmoid)
     * Kernel size in convolutional layers

2. Training Hyperparameters:
   - Control the learning process and optimization of the model.
   - Examples include:
     * Learning rate: Determines the step size for weight updates.
     * Batch size: Number of training examples processed at a time.
     * Number of epochs: Total iterations over the entire training dataset.
     * Optimizer type: Choice of algorithm for optimization (e.g., SGD, Adam).
     * Dropout rate: Percentage of neurons deactivated to prevent overfitting.

Hyperparameter tuning involves methods such as grid search, random search, or automated tools like Optuna and Hyperopt to find optimal configurations. Selecting appropriate hyperparameters can improve accuracy, efficiency, and generalization.
"""
print(Text)



Hyperparameters are configurations set before training a machine learning model. They are not learned during training but are crucial in defining the model's performance and training process. Hyperparameters can be categorized into two main types:

1. Model Hyperparameters:
   - Define the architecture and complexity of the model.
   - Examples include:
     * Number of layers in a neural network
     * Number of neurons in each layer
     * Type of activation functions (e.g., ReLU, Sigmoid)
     * Kernel size in convolutional layers

2. Training Hyperparameters:
   - Control the learning process and optimization of the model.
   - Examples include:
     * Learning rate: Determines the step size for weight updates.
     * Batch size: Number of training examples processed at a time.
     * Number of epochs: Total iterations over the entire training dataset.
     * Optimizer type: Choice of algorithm for optimization (e.g., SGD, Adam).
     * Dropout rate: Percentage of neurons deactiva

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512,chunk_overlap=128)
chunks = text_splitter.split_text(Text)

In [6]:
vector_store = Chroma.from_texts(chunks, embedding_model)


In [7]:
retriever = vector_store.as_retriever()

In [8]:
chain = create_retrieval_chain(combine_docs_chain=llm,retriever=retriever)

In [9]:
retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
print(retrieval_qa_chat_prompt)



input_variables=['context', 'input'] optional_variables=['chat_history'] input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')], typing.

In [10]:
combine_docs_chain = create_stuff_documents_chain(llm,retrieval_qa_chat_prompt)
# print(combine_docs_chain)

In [11]:
retrieval_chain = create_retrieval_chain(retriever,combine_docs_chain)

In [12]:
qst = "what are hyperparameters?"

In [13]:
res = retrieval_chain.invoke({"input":qst})

In [14]:
print(res['answer'])

Hyperparameters are configurations set before training a machine learning model. They're not learned during training but are crucial in defining the model's performance and training process.


In [15]:
# db_path='db'
# db = Chroma(
#     persist_directory=db_path,embedding_function=embedding_model
# )
# results = db.similarity_search_with_score(qst,k=10)
# context = "\n\n----\n\n".join([doc.page_content for doc,_score in results])

In [16]:
# PROMPT_TEMPLATE ="""
# Aswer the question based only on the following context:
# {context}

# ----
# answer the question based on the above context: {question}

# """

# prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)

# prompt = prompt_template.format(context=context,question=qst)

In [17]:
# result = llm.invoke(prompt)

In [18]:
# print(result.content)

In [17]:
from langchain_community.document_loaders import PyPDFDirectoryLoader
import shutil


In [18]:
from langchain.document_loaders import PyPDFLoader

pdf_paths = [
  "data\Hyperparameters_in_ML.pdf"
]

documents = []

for pdf_path in pdf_paths:
  loader = PyPDFLoader(pdf_path)
  documents.extend(loader.load())

documents = loader.load()

  "data\Hyperparameters_in_ML.pdf"


In [19]:
text_spliter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    is_separator_regex=False,
)
context = text_spliter.split_documents(documents=documents)

In [20]:
db_path = 'rag_env/db'

In [21]:
# if os.path.exists(db_path):
#     shutil.rmtree(db_path)
db = Chroma(
    persist_directory=db_path,embedding_function=embedding_model
)
db.add_documents(context)
db.persist()
print("added")

added


  db.persist()
