In [1]:
import os
import openai
import sys
sys.path.append('../..')

from dotenv import load_dotenv, find_dotenv
_=load_dotenv(find_dotenv()) # read local .env file

openai.api_key =os.environ['OPENAI_API_KEY']

### Using the PYPDF Loader

In [2]:
# pip install langchain langchain-community langchain-text-splitters  pymupdf

from pathlib import Path
import re
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

BASE_DIR=Path("../../Courses/signals and systems")   

# load every pdf under the course folder 
loader=DirectoryLoader(
    str(BASE_DIR),
    glob="**/*.pdf",
    loader_cls=PyPDFLoader,
)


In [None]:
raw_docs=loader.load()

In [7]:
doc_sample=raw_docs[0]
path_sample=Path(doc_sample.metadata['source'])
print([x.lower() for x in path_sample.parts])

['..', '..', 'courses', 'signals and systems', 'assigments', 'assignment1_lecture2_lecture3_lecture4.pdf']


#### Document Splittings

In [95]:

r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, # Maximum size of chunks to return
    chunk_overlap= 50 # Overlap in characters between chunks
)
docs=r_splitter.split_documents(raw_docs) # we are dealing with a list of documents not text, thats why we did not use split_text

#### Vector Stores and Embeddings

In [96]:
from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings()

In [None]:
from langchain.vectorstores import Chroma
persist_directory = '../RAG_tests_db/PYPDF/'
vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embedding,
    collection_name="ele430_signals",
    persist_directory=persist_directory
)

#### Retrieval

In [98]:
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import OpenAI
document_content_description = "Signals and Systems"
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectordb,
    document_contents=document_content_description,
    metadata_field_info=metadata_field_info,
    verbose=True
)

In [99]:
from langchain.chains import  ConversationalRetrievalChain
qa_pypdf_loader_test = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    chain_type="map_reduce", # depending on the state, might be refine (mostprobably)
    return_source_documents=True,
    )

### USING the PYMUPDF LOADER

In [25]:
# pip install langchain langchain-community langchain-text-splitters  pymupdf

from pathlib import Path
import re
from langchain_community.document_loaders import DirectoryLoader, PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

BASE_DIR=Path("../../Courses/signals and systems")   

# load every pdf under the course folder 
loader=DirectoryLoader(
    str(BASE_DIR),
    glob="**/*.pdf",
    loader_cls=PyMuPDFLoader,
)


In [None]:
raw_docs=loader.load()

#### Document Splittings

In [100]:

r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, # Maximum size of chunks to return
    chunk_overlap= 50 # Overlap in characters between chunks
)
docs=r_splitter.split_documents(raw_docs) # we are dealing with a list of documents not text, thats why we did not use split_text

#### Vector Stores and Embeddings

In [101]:
from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings()

In [None]:
from langchain.vectorstores import Chroma
persist_directory = '../RAG_tests_db/PyMuPDF/'
vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embedding,
    collection_name="ele430_signals",
    persist_directory=persist_directory
)

#### Retrieval

In [103]:
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import OpenAI
document_content_description = "Signals and Systems"
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectordb,
    document_contents=document_content_description,
    metadata_field_info=metadata_field_info,
    verbose=True
)

In [104]:
from langchain.chains import  ConversationalRetrievalChain
qa_pymupdf_loader_test = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    chain_type="map_reduce", # depending on the state, might be refine (mostprobably)
    return_source_documents=True,
    )

## Chunks testing

for the rest of the tests, we will adopt a common criteria in terms of using the same loader, every test will be independent so that we may get the best result. for the overlap, it will be set to 10 % of the chunk size

In [37]:
# pip install langchain langchain-community langchain-text-splitters  pymupdf

from pathlib import Path
import re
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

BASE_DIR=Path("../../Courses/signals and systems")   

# load every pdf under the course folder 
loader=DirectoryLoader(
    str(BASE_DIR),
    glob="**/*.pdf",
    loader_cls=PyPDFLoader,
)


In [38]:
raw_docs=loader.load()

Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 11 0 (offset 0)
Ignoring wrong pointing object 15 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 28 0 (offset 0)
Ignoring wrong pointing object 9 0 (offset 0)
Ignoring wrong pointing object 11 0 (offset 0)
Ignoring wrong pointing object 30 0 (offset 0)
Ignoring wrong pointing object 33 0 (offset 0)
Ignoring wrong pointing object 31 0 (offset 0)
Ignoring wrong pointing object 11 0 (offset 0)
Ignoring wrong pointing object 22 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 17 0 (offset 0)
Ignoring wrong pointing object 19 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)
Ignoring wrong pointing object 38 0 (offset 0)
Ignoring wrong pointing object 40 0 (offset 0)
Ignoring wrong pointing object 81 0 (offset 0)
Ignoring wrong pointing object 108 0 (offset 0)
Ignoring wrong pointing object 123 0 (offset 0)
Ignoring wron

### Chunk size = 500  

#### Document Splittings

In [54]:

r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, # Maximum size of chunks to return
    chunk_overlap= 50 # Overlap in characters between chunks
)
docs=r_splitter.split_documents(raw_docs) # we are dealing with a list of documents not text, thats why we did not use split_text

#### Vector Stores and Embeddings

In [55]:
from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings()

In [None]:
from langchain.vectorstores import Chroma
persist_directory = '../RAG_tests_db/C500/'
vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embedding,
    collection_name="ele430_signals",
    persist_directory=persist_directory
)

#### Retrieval

In [57]:
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import OpenAI
document_content_description = "Signals and Systems"
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectordb,
    document_contents=document_content_description,
    metadata_field_info=metadata_field_info,
    verbose=True
)

In [58]:
from langchain.chains import  ConversationalRetrievalChain
qa_C500_loader_test = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    chain_type="map_reduce", # depending on the state, might be refine (mostprobably)
    return_source_documents=True,
    )

### Chunk size = 1000

#### Document Splittings

In [59]:

r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, # Maximum size of chunks to return
    chunk_overlap= 10 # Overlap in characters between chunks
)
docs=r_splitter.split_documents(raw_docs) # we are dealing with a list of documents not text, thats why we did not use split_text

#### Vector Stores and Embeddings

In [60]:
from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings()

In [None]:
from langchain.vectorstores import Chroma
persist_directory = '../RAG_tests_db/C1000/'
vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embedding,
    collection_name="ele430_signals",
    persist_directory=persist_directory
)

#### Retrieval

In [62]:
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import OpenAI
document_content_description = "Signals and Systems"
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectordb,
    document_contents=document_content_description,
    metadata_field_info=metadata_field_info,
    verbose=True
)

In [63]:
from langchain.chains import  ConversationalRetrievalChain
qa_C1000_loader_test = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    chain_type="map_reduce", # depending on the state, might be refine (mostprobably)
    return_source_documents=True,
    )

## Chunk size = 1500

#### Document Splittings

In [49]:

r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500, # Maximum size of chunks to return
    chunk_overlap= 150 # Overlap in characters between chunks
)
docs=r_splitter.split_documents(raw_docs) # we are dealing with a list of documents not text, thats why we did not use split_text

#### Vector Stores and Embeddings

In [50]:
from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings()

In [None]:
from langchain.vectorstores import Chroma
persist_directory = '../RAG_tests_db/C1500/'
vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embedding,
    collection_name="ele430_signals",
    persist_directory=persist_directory
)

#### Retrieval

In [52]:
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import OpenAI
document_content_description = "Signals and Systems"
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectordb,
    document_contents=document_content_description,
    metadata_field_info=metadata_field_info,
    verbose=True
)

In [53]:
from langchain.chains import  ConversationalRetrievalChain
qa_C1500_loader_test = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    chain_type="map_reduce", # depending on the state, might be refine (mostprobably)
    return_source_documents=True,
    )

## Embeddings Testing

### OpenAI embeddings

#### Document Splittings

In [74]:

r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, # Maximum size of chunks to return
    chunk_overlap= 50 # Overlap in characters between chunks
)
docs=r_splitter.split_documents(raw_docs) # we are dealing with a list of documents not text, thats why we did not use split_text

#### Vector Stores and Embeddings

In [75]:
from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings()

In [None]:
from langchain.vectorstores import Chroma
persist_directory = '../RAG_tests_db/OpenAiEmbd/'
vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embedding,
    collection_name="ele430_signals",
    persist_directory=persist_directory
)

#### Retrieval

In [77]:
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import OpenAI
document_content_description = "Signals and Systems"
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectordb,
    document_contents=document_content_description,
    metadata_field_info=metadata_field_info,
    verbose=True
)

In [78]:
from langchain.chains import  ConversationalRetrievalChain
qa_openaiembd_loader_test = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    chain_type="map_reduce", # depending on the state, might be refine (mostprobably)
    return_source_documents=True,
    )

### Google Gen AI embeddings

In [71]:
import os, getpass
if not os.environ.get("GOOGLE_API_KEY"):
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for Google Gemini: ")


#### Vector Stores and Embeddings

In [79]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

In [None]:
from langchain.vectorstores import Chroma
persist_directory = '../RAG_tests_db/Gemini001Embd/'
vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embeddings,
    collection_name="ele430_signals",
    persist_directory=persist_directory
)

GoogleGenerativeAIError: Error embedding content: 429 Resource has been exhausted (e.g. check quota).

#### Retrieval

In [None]:
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import OpenAI
document_content_description = "Signals and Systems"
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectordb,
    document_contents=document_content_description,
    metadata_field_info=metadata_field_info,
    verbose=True
)

In [None]:
from langchain.chains import  ConversationalRetrievalChain
qa_geminiEmbd001_loader_test = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    chain_type="map_reduce", # depending on the state, might be refine (mostprobably)
    return_source_documents=True,
    )

I will adopt the open ai embeddings since i won't be able to embed using google gen ai embeddings technique (i have a large load of documents)

## Tests

in this section, i will be relying on the following tutorial.

ref: https://learn.deeplearning.ai/courses/langchain/lesson/dputu/evaluation

In [82]:
from langchain_openai import ChatOpenAI

In [None]:
# this is the bench mark:
examples = [
    {
        "query": "For a continuous-time sinusoid x(t)=A sin(2π f0 t + θ), what is its period T0? Then, for x(t)=50 sin(200π t), what are f0 and T0?",
        "answer": "T0 = 1/f0. For x(t)=50 sin(200π t): f0 = 100 Hz and T0 = 10 ms."
    },
    {
        "query": "What is a causal system?",
        "answer": "A system is causal if its response occurs only during or after the time in which the excitation is applied. The current output depends only on the current and previous inputs; it does not depend on future inputs."
    },
    {
        "query": "What does time-invariance mean for a system?",
        "answer": "If an excitation x(t) causes a response y(t) and an excitation x(t − t0) causes a response y(t − t0), for an arbitrary t0, the system is said to be time invariant."
    },
    {
        "query": "How are signal energy and signal power defined?",
        "answer": "Signal energy is the area under the square of the magnitude of the signal (a measure of total energy). Signal power is the rate at which energy is transmitted or used."
    },
    {
        "query": "what determines the order of a (digital) system?",
        "answer": "The order of the system is the number of poles."
    },
    {
        "query": "How do you identify the filter type in first-order digital filters, and what indicates a low-pass case?",
        "answer": "Inspect what is happening between 0 and π. A decreasing magnitude response indicates a low-pass filter (“decreating : Iow pas filter”)."
    },
    {
        "query": "State the Initial Value Theorem and the Final Value Theorem for the Laplace transform, with their usual conditions.",
        "answer": "IVT: x(0+) = lim_{s→∞} s X(s) (when the limit exists and there is no impulse at t=0). FVT: x(∞) = lim_{s→0} s X(s) (provided all poles of sX(s) lie in the open left half-plane so the finite limit exists)."
    },
    {
        "query": "What is the Region of Convergence (ROC) in the Laplace transform, and what is the ROC for x(t)=e^{a t} u(t)?",
        "answer": "The ROC is the set of s where the Laplace integral converges; for right-sided signals it lies to the right of the rightmost pole. For x(t)=e^{a t} u(t), X(s)=1/(s−a) and the ROC is Re{s} > a."
    },
    {
        "query": "What is the Fourier transform of the unit impulse, and what is the inverse Fourier transform of a unit impulse in frequency?",
        "answer": "The Fourier transform of the unit impulse is a constant (real and even). The inverse Fourier transform of a unit impulse in frequency is a constant in time; the impulse area is 1."
    },
    {
        "query": "In amplitude modulation by a cosine carrier, where do the spectral components move, and how is the original message recovered?",
        "answer": "Multiplying by a cosine shifts the spectrum to the carrier, creating sidebands at f_c ± f. The message is recovered with a low-pass filter (LPF) to bring the baseband back."
    }
]


In [121]:
from langchain.evaluation.qa import QAEvalChain
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
eval_chain = QAEvalChain.from_llm(llm)
eval_examples = examples


## Loaders evaluation

### PYPDF loder

In [137]:
# Provide the required keys: 'chat_history' and 'question'
predictions = qa_pypdf_loader_test.apply(
	[{"chat_history":[],"question":example["query"]} for example in examples]
)

In [138]:
predictions

[{'chat_history': [],
  'question': 'For a continuous-time sinusoid x(t)=A sin(2π f0 t + θ), what is its period T0? Then, for x(t)=50 sin(200π t), what are f0 and T0?',
  'answer': ' For x(t)=50 sin(200π t), f0 = 100 Hz and T0 = 0.01 seconds.',
  'source_documents': [Document(metadata={'is_solution': False, 'page': 9, 'course_id': 'signals and systems', 'source': '..\\..\\Courses\\signals and systems\\Chapters\\Chapter 1\\ELE430_Chapter1_Lecture1_Lecture2.pdf', 'doc_id': 'ELE430_Chapter1_Lecture1_Lecture2', 'page_label': '10', 'creationdate': "D:20250815111755Z00'00'", 'has_multiple_lectures': True, 'category': 'chapters', 'producer': 'iOS Version 18.1.1 (Build 22B91) Quartz PDFContext', 'relpath': 'Chapters\\Chapter 1\\ELE430_Chapter1_Lecture1_Lecture2.pdf', 'chapter': 1, 'moddate': "D:20250815111755Z00'00'", 'creator': 'PyPDF', 'lecture_max': 2, 'lecture_min': 1, 'filename': 'ELE430_Chapter1_Lecture1_Lecture2.pdf', 'total_pages': 26}, page_content='14/01/2024\n10\nLAU –SOE, ELE 430: 

In [139]:
graded = eval_chain.evaluate(
    examples=eval_examples,
    predictions=predictions,      # must contain "answer"
    question_key="query",         # <-- your question field
    prediction_key="answer",      # <-- your model’s output field
    answer_key="answer",          # <-- reference/ground-truth field
)

In [140]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i].get('question', 'No query available'))
    print("Real Answer: " + examples[i].get('answer', 'No answer available'))
    print("Predicted Answer: " + predictions[i].get('answer', 'No result available'))
    print("Predicted Grade: " + graded[i].get('results', 'No grade available'))
    print()

Example 0:
Question: For a continuous-time sinusoid x(t)=A sin(2π f0 t + θ), what is its period T0? Then, for x(t)=50 sin(200π t), what are f0 and T0?
Real Answer: T0 = 1/f0. For x(t)=50 sin(200π t): f0 = 100 Hz and T0 = 10 ms.
Predicted Answer:  For x(t)=50 sin(200π t), f0 = 100 Hz and T0 = 0.01 seconds.
Predicted Grade: GRADE: INCORRECT

Example 1:
Question: What is a causal system?
Real Answer: A system is causal if its response occurs only during or after the time in which the excitation is applied. The current output depends only on the current and previous inputs; it does not depend on future inputs.
Predicted Answer:  A causal system is one in which the response only occurs during or after the time in which the excitation is applied.
Predicted Grade: CORRECT

Example 2:
Question: What does time-invariance mean for a system?
Real Answer: If an excitation x(t) causes a response y(t) and an excitation x(t − t0) causes a response y(t − t0), for an arbitrary t0, the system is said to

### PYMUPDF loader

In [125]:
# Provide the required keys: 'chat_history' and 'question'
predictions = qa_pymupdf_loader_test.apply(
	[{"chat_history":[],"question":example["query"]} for example in examples]
)

In [126]:
graded = eval_chain.evaluate(
    examples=eval_examples,
    predictions=predictions,      # must contain "answer"
    question_key="query",         # <-- your question field
    prediction_key="answer",      # <-- your model’s output field
    answer_key="answer",          # <-- reference/ground-truth field
)

In [127]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i].get('question', 'No query available'))
    print("Real Answer: " + examples[i].get('answer', 'No answer available'))
    print("Predicted Answer: " + predictions[i].get('answer', 'No result available'))
    print("Predicted Grade: " + graded[i].get('results', 'No grade available'))
    print()

Example 0:
Question: For a continuous-time sinusoid x(t)=A sin(2π f0 t + θ), what is its period T0? Then, for x(t)=50 sin(200π t), what are f0 and T0?
Real Answer: T0 = 1/f0. For x(t)=50 sin(200π t): f0 = 100 Hz and T0 = 10 ms.
Predicted Answer:  For x(t)=50 sin(200π t), f0 = 100 Hz and T0 = 0.01 seconds.
Predicted Grade: GRADE: INCORRECT

Example 1:
Question: What is a causal system?
Real Answer: A system is causal if its response occurs only during or after the time in which the excitation is applied. The current output depends only on the current and previous inputs; it does not depend on future inputs.
Predicted Answer:  A causal system is one in which the response occurs only during or after the time in which the excitation is applied. It means that the current output only depends on the current and previous inputs, and not on future inputs.
Predicted Grade: CORRECT

Example 2:
Question: What does time-invariance mean for a system?
Real Answer: If an excitation x(t) causes a respo

There is a tie between the two loaders, so there is no difference between which loader to use. **note that** the first question is correct in the current and the following models despite being tagged as incorrect

## Chunk sizes Tests

### chunk size = 500 loader

In [128]:
# Provide the required keys: 'chat_history' and 'question'
predictions = qa_C500_loader_test.apply(
	[{"chat_history":[],"question":example["query"]} for example in examples]
)

In [129]:
graded = eval_chain.evaluate(
    examples=eval_examples,
    predictions=predictions,      # must contain "answer"
    question_key="query",         # <-- your question field
    prediction_key="answer",      # <-- your model’s output field
    answer_key="answer",          # <-- reference/ground-truth field
)

In [130]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i].get('question', 'No query available'))
    print("Real Answer: " + examples[i].get('answer', 'No answer available'))
    print("Predicted Answer: " + predictions[i].get('answer', 'No result available'))
    print("Predicted Grade: " + graded[i].get('results', 'No grade available'))
    print()

Example 0:
Question: For a continuous-time sinusoid x(t)=A sin(2π f0 t + θ), what is its period T0? Then, for x(t)=50 sin(200π t), what are f0 and T0?
Real Answer: T0 = 1/f0. For x(t)=50 sin(200π t): f0 = 100 Hz and T0 = 10 ms.
Predicted Answer:  For x(t)=50 sin(200π t), f0 = 100 Hz and T0 = 0.01 seconds.
Predicted Grade: GRADE: INCORRECT

Example 1:
Question: What is a causal system?
Real Answer: A system is causal if its response occurs only during or after the time in which the excitation is applied. The current output depends only on the current and previous inputs; it does not depend on future inputs.
Predicted Answer:  A causal system is one in which the current output only depends on the current and previous inputs, and not on future inputs.
Predicted Grade: CORRECT

Example 2:
Question: What does time-invariance mean for a system?
Real Answer: If an excitation x(t) causes a response y(t) and an excitation x(t − t0) causes a response y(t − t0), for an arbitrary t0, the system is

### chunk size = 1000 loader

In [131]:
# Provide the required keys: 'chat_history' and 'question'
predictions = qa_C1000_loader_test.apply(
	[{"chat_history":[],"question":example["query"]} for example in examples]
)

In [132]:
graded = eval_chain.evaluate(
    examples=eval_examples,
    predictions=predictions,      # must contain "answer"
    question_key="query",         # <-- your question field
    prediction_key="answer",      # <-- your model’s output field
    answer_key="answer",          # <-- reference/ground-truth field
)

In [133]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i].get('question', 'No query available'))
    print("Real Answer: " + examples[i].get('answer', 'No answer available'))
    print("Predicted Answer: " + predictions[i].get('answer', 'No result available'))
    print("Predicted Grade: " + graded[i].get('results', 'No grade available'))
    print()

Example 0:
Question: For a continuous-time sinusoid x(t)=A sin(2π f0 t + θ), what is its period T0? Then, for x(t)=50 sin(200π t), what are f0 and T0?
Real Answer: T0 = 1/f0. For x(t)=50 sin(200π t): f0 = 100 Hz and T0 = 10 ms.
Predicted Answer:  For x(t)=50 sin(200π t), f0 = 200π and T0 = 1/(200π) = 1/200 seconds.
Predicted Grade: GRADE: INCORRECT

Example 1:
Question: What is a causal system?
Real Answer: A system is causal if its response occurs only during or after the time in which the excitation is applied. The current output depends only on the current and previous inputs; it does not depend on future inputs.
Predicted Answer:  A causal system is one in which the output only depends on the input at the same time or previous times, and not on future inputs.
Predicted Grade: CORRECT

Example 2:
Question: What does time-invariance mean for a system?
Real Answer: If an excitation x(t) causes a response y(t) and an excitation x(t − t0) causes a response y(t − t0), for an arbitrary t0

### chunk size = 1500 loader

In [134]:
# Provide the required keys: 'chat_history' and 'question'
predictions = qa_C1500_loader_test.apply(
	[{"chat_history":[],"question":example["query"]} for example in examples]
)

In [135]:
graded = eval_chain.evaluate(
    examples=eval_examples,
    predictions=predictions,      # must contain "answer"
    question_key="query",         # <-- your question field
    prediction_key="answer",      # <-- your model’s output field
    answer_key="answer",          # <-- reference/ground-truth field
)

In [136]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i].get('question', 'No query available'))
    print("Real Answer: " + examples[i].get('answer', 'No answer available'))
    print("Predicted Answer: " + predictions[i].get('answer', 'No result available'))
    print("Predicted Grade: " + graded[i].get('results', 'No grade available'))
    print()

Example 0:
Question: For a continuous-time sinusoid x(t)=A sin(2π f0 t + θ), what is its period T0? Then, for x(t)=50 sin(200π t), what are f0 and T0?
Real Answer: T0 = 1/f0. For x(t)=50 sin(200π t): f0 = 100 Hz and T0 = 10 ms.
Predicted Answer:  For x(t)=50 sin(200π t), f0 = 100 Hz and T0 = 0.01 seconds.
Predicted Grade: GRADE: INCORRECT

Example 1:
Question: What is a causal system?
Real Answer: A system is causal if its response occurs only during or after the time in which the excitation is applied. The current output depends only on the current and previous inputs; it does not depend on future inputs.
Predicted Answer:  The system y(t)= x(|t −2|) is not causal.
Predicted Grade: CORRECT

Example 2:
Question: What does time-invariance mean for a system?
Real Answer: If an excitation x(t) causes a response y(t) and an excitation x(t − t0) causes a response y(t − t0), for an arbitrary t0, the system is said to be time invariant.
Predicted Answer:  Time-invariance means that the output

Based on the above example, it seems there is a ties between the 1500 and 1000 chunk sizes rags that are returning an 80% (actually 9 out of 10) accuracy in comparison with the 500 chunk size rag that get all the questions correct, thats why we decided to set the chunk size to 500

## Chain type test

we will conduct this test to check which chain type is better, we will set the chunk size to 1000 to keep a room for incorrect hits and have a clearer vizualization of the results. note that we already know the result of the map reduce chain type rag, 9/10. 

#### Document Splittings

In [141]:

r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, # Maximum size of chunks to return
    chunk_overlap= 10 # Overlap in characters between chunks
)
docs=r_splitter.split_documents(raw_docs) # we are dealing with a list of documents not text, thats why we did not use split_text

#### Vector Stores and Embeddings

In [142]:
from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings()

In [None]:
from langchain.vectorstores import Chroma
persist_directory = '../RAG_tests_db/C1000/'
vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embedding,
    collection_name="ele430_signals",
    persist_directory=persist_directory
)

#### Retrieval

In [144]:
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import OpenAI
document_content_description = "Signals and Systems"
llm = OpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectordb,
    document_contents=document_content_description,
    metadata_field_info=metadata_field_info,
    verbose=True
)

### Refine chain type

In [145]:
from langchain.chains import  ConversationalRetrievalChain
qa_refine_loader_test = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    chain_type="refine", # depending on the state, might be refine (mostprobably)
    return_source_documents=True,
    )

### stuff

In [151]:
from langchain.chains import  ConversationalRetrievalChain
qa_rerank_loader_test = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    chain_type="stuff", # depending on the state, might be refine (mostprobably)
    return_source_documents=True,
    )

## Testing

### refine test

In [147]:
# Provide the required keys: 'chat_history' and 'question'
predictions = qa_refine_loader_test.apply(
	[{"chat_history":[],"question":example["query"]} for example in examples]
)

In [148]:
graded = eval_chain.evaluate(
    examples=eval_examples,
    predictions=predictions,      # must contain "answer"
    question_key="query",         # <-- your question field
    prediction_key="answer",      # <-- your model’s output field
    answer_key="answer",          # <-- reference/ground-truth field
)

In [149]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i].get('question', 'No query available'))
    print("Real Answer: " + examples[i].get('answer', 'No answer available'))
    print("Predicted Answer: " + predictions[i].get('answer', 'No result available'))
    print("Predicted Grade: " + graded[i].get('results', 'No grade available'))
    print()

Example 0:
Question: For a continuous-time sinusoid x(t)=A sin(2π f0 t + θ), what is its period T0? Then, for x(t)=50 sin(200π t), what are f0 and T0?
Real Answer: T0 = 1/f0. For x(t)=50 sin(200π t): f0 = 100 Hz and T0 = 10 ms.
Predicted Answer: 

For a continuous-time sinusoid x(t)=A sin(2π f0 t + θ), the period T0 is equal to 1/f0, where f0 is the fundamental frequency of the signal. This means that the signal repeats itself every T0 seconds, with a fundamental frequency of f0 Hz. 

For x(t)=50 sin(200π t), f0 is equal to 100 Hz and T0 is equal to 10 ms. This means that the signal repeats itself every 10 ms, with a fundamental frequency of 100 Hz. 

The unit step function, u(t), is a common continuous-time signal that is defined as 1 for t ≥ 0 and 0 for t < 0. It is often used to model the behavior of a system that is turned on at t=0. 

Another common continuous-time signal is the unit sinc function, defined as sinc(t) = sin(πt)/(πt). It has a value of 1 at t=0 and approaches 0 as t

although it is difficult to see, there is more incorrects than map refine, our used chain type

### rerank test

In [152]:
# Provide the required keys: 'chat_history' and 'question'
predictions = qa_rerank_loader_test.apply(
	[{"chat_history":[],"question":example["query"]} for example in examples]
)

In [153]:
graded = eval_chain.evaluate(
    examples=eval_examples,
    predictions=predictions,      # must contain "answer"
    question_key="query",         # <-- your question field
    prediction_key="answer",      # <-- your model’s output field
    answer_key="answer",          # <-- reference/ground-truth field
)

In [154]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i].get('question', 'No query available'))
    print("Real Answer: " + examples[i].get('answer', 'No answer available'))
    print("Predicted Answer: " + predictions[i].get('answer', 'No result available'))
    print("Predicted Grade: " + graded[i].get('results', 'No grade available'))
    print()

Example 0:
Question: For a continuous-time sinusoid x(t)=A sin(2π f0 t + θ), what is its period T0? Then, for x(t)=50 sin(200π t), what are f0 and T0?
Real Answer: T0 = 1/f0. For x(t)=50 sin(200π t): f0 = 100 Hz and T0 = 10 ms.
Predicted Answer:  For a continuous-time sinusoid x(t)=A sin(2π f0 t + θ), its period T0 is equal to 1/f0. For x(t)=50 sin(200π t), f0 is equal to 100 Hz and T0 is equal to 10 ms.
Predicted Grade: CORRECT

Example 1:
Question: What is a causal system?
Real Answer: A system is causal if its response occurs only during or after the time in which the excitation is applied. The current output depends only on the current and previous inputs; it does not depend on future inputs.
Predicted Answer:  A causal system is a system where the current output only depends on the current and previous inputs, and does not depend on future inputs.
Predicted Grade: CORRECT

Example 2:
Question: What does time-invariance mean for a system?
Real Answer: If an excitation x(t) causes a

The stuff is the goat chain type ...