In [9]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [10]:
from langchain_groq import ChatGroq

llm = ChatGroq(model = "openai/gpt-oss-20b")
llm_response  = llm.invoke("What is the capital of France?")
print(llm_response)

content='The capital of France is **Paris**.' additional_kwargs={'reasoning_content': "We need to answer the question. It's straightforward: Paris."} response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 78, 'total_tokens': 109, 'completion_time': 0.030772302, 'completion_tokens_details': {'reasoning_tokens': 13}, 'prompt_time': 0.006442778, 'prompt_tokens_details': None, 'queue_time': 0.047969391, 'total_time': 0.03721508}, 'model_name': 'openai/gpt-oss-20b', 'system_fingerprint': 'fp_e99e93f2ac', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None, 'model_provider': 'groq'} id='lc_run--4b2c6e54-faee-4a48-9429-9fd394c850ec-0' usage_metadata={'input_tokens': 78, 'output_tokens': 31, 'total_tokens': 109, 'output_token_details': {'reasoning': 13}}


In [11]:
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()
chain = llm | output_parser
parsed_response = chain.invoke("What is the capital of Germany?")
print(parsed_response)

The capital of Germany is **Berlin**.


In [12]:
from typing import List
from pydantic import BaseModel, Field

class RelocationDetails(BaseModel):
    city: str = Field(description="The city to which the person is relocating")
    country: str = Field(description="The country to which the person is relocating")
    from_city: str = Field(description="The city from which the person is relocating")
    from_country: str = Field(description="The country from which the person is relocating")
    recommended_neighborhoods: List[str] = Field(description="A list of recommended neighborhoods in the new city")
    average_cost_of_living: float = Field(description="The average monthly cost of living in the new city in USD")
    summary: str = Field(description="A brief summary of the relocation details")

relocation_text = """
A Candidate is living in london and is relocating to Berlin, Germany for a new job opportunity.
"""

structured_llm = llm.with_structured_output(RelocationDetails)
relocation_details = structured_llm.invoke(relocation_text)
print(relocation_details)

city='Berlin' country='Germany' from_city='London' from_country='United Kingdom' recommended_neighborhoods=['Mitte', 'Prenzlauer Berg', 'Friedrichshain', 'Kreuzberg', 'Charlottenburg'] average_cost_of_living=1400.0 summary='The candidate will move from London, UK to Berlin, Germany for a new job. The average monthly cost of living in Berlin is around $1,400 USD. Recommended neighborhoods include Mitte, Prenzlauer Berg, Friedrichshain, Kreuzberg, and Charlottenburg.'


In [13]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(
  "Provide a brief summary about {topic}."
)
chain = prompt | llm | output_parser
result = chain.invoke({"topic": "the Eiffel Tower"})
print(result)

The Eiffel Tower, located in Paris, France, is a wrought‑iron lattice tower that stands 330 meters (1,083 feet) tall. Designed by engineer Gustave Eiffel and completed in 1889 for the Exposition Universelle (World’s Fair) celebrating the 100th anniversary of the French Revolution, it was initially criticized but has since become an iconic symbol of French culture and engineering. The tower features three observation decks (with restaurants on the first two), and it attracts over 7 million visitors annually. Its distinctive iron framework, illuminated at night, and panoramic views of Paris make it one of the most visited and photographed landmarks in the world.


In [14]:
from langchain_core.messages import HumanMessage, SystemMessage

messages = [
    SystemMessage(content="You are a helpful assistant that provides concise information."),
    HumanMessage(content="Tell me a fun fact about the Great Wall of China.")
]

result = llm.invoke(messages)
print(result)

template = ChatPromptTemplate([
  ("system", "You are a knowledgeable assistant."),
  ("human", "Explain the significance of {event} in history.")
])

chain = template | llm
response = chain.invoke({"event": "the fall of the Berlin Wall"})
print(response)

content='**Fun fact:** The Great Wall isn’t one continuous wall—it’s a network of walls, fortifications, and watchtowers built over 1,500\u202fyears, stretching roughly 21,196\u202fkm (about 13,170\u202fmiles) across northern China.' additional_kwargs={'reasoning_content': 'We need to give a concise fun fact about the Great Wall of China. According to instructions: "You are a helpful assistant that provides concise information." So just a short fun fact. Let\'s produce something like: "The Great Wall is not a single continuous wall but a series of walls and fortifications built over centuries." Or "It stretches over 13,000 miles." Or "It is so long it could be seen from space." Actually the myth is false. We can say "It was built by millions of workers, including soldiers and forced laborers, over 2,000 years." Let\'s pick something interesting: "The Great Wall is not a single wall but a network of walls and fortifications built over 1,500 years, stretching about 21,196 km." That is a 

In [15]:
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing import List
from langchain_core.documents import Document
import os

def load_documents(folder_path: str) -> List[Document]:
    documents = []
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if filename.endswith('.pdf'):
            loader = PyPDFLoader(file_path)
        elif filename.endswith('.docx'):
            loader = Docx2txtLoader(file_path)
        else:
            print(f"Unsupported file type: {filename}")
            continue
        documents.extend(loader.load())
    return documents

folder_path = "./documents"
documents = load_documents(folder_path)
print(f"Loaded {len(documents)} documents from the folder.")


Loaded 94 documents from the folder.


In [16]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, length_function=len)
splits = text_splitter.split_documents(documents)
print(f"Split into {len(splits)} chunks.")

Split into 137 chunks.


In [17]:
print(documents[0])

page_content='HackRF
Great Scott Gadgets
Jul 17, 2024' metadata={'producer': 'pdfTeX-1.40.22', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-07-17T17:48:14+00:00', 'author': 'Great Scott Gadgets', 'title': 'HackRF', 'subject': '', 'keywords': '', 'moddate': '2024-07-17T17:48:14+00:00', 'trapped': '/False', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.22 (TeX Live 2022/dev/Debian) kpathsea version 6.3.4/dev', 'source': './documents\\DOC052841344.pdf', 'total_pages': 94, 'page': 0, 'page_label': '1'}


In [18]:
print(splits[1])
print(splits[0].metadata)

page_content='USER DOCUMENTATION
1 Getting Help 1
2 FAQ 3
2.1 What is the Transmit Power of HackRF? . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3
2.2 What is the Receive Power of HackRF? . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3
2.3 What is the minimum signal power level that can be detected by HackRF? . . . . . . . . . . . . . . . 4
2.4 Is HackRF full-duplex? . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4
2.5 Why isn’t HackRF One full-duplex? . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4
2.6 How could the HackRF One design be changed to make it full-duplex? . . . . . . . . . . . . . . . . 4
2.7 What is the big spike in the center of my received spectrum? . . . . . . . . . . . . . . . . . . . . . . 5
2.8 How do I deal with the big spike in the middle of my spectrum? . . . . . . . . . . . . . . . . . . . . 5' metadata={'producer': 'pdfTeX-1.40.22', 'creator': 'LaTeX with hyperr

In [19]:
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings

embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
document_embeddings= embedding_function.embed_documents([split.page_content for split in splits])
print(document_embeddings[0][:5])

  embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")


[-0.16012603044509888, 0.00766132352873683, -0.04513724893331528, -0.07161771506071091, 0.005821555387228727]


In [20]:
from langchain_chroma import Chroma

collection_name = "Hackrf"

vector_store = Chroma.from_documents(
  collection_name = collection_name,
  documents = splits,
  embedding=embedding_function,
  persist_directory="./chroma_db" 
)

print("Vector store created and documents added.")

Vector store created and documents added.


In [21]:
query = "What are the key features of HackRF?"
search_results = vector_store.similarity_search(query, k=3)
print(f"\nTop 2 most relevant chunks for the query: '{query}'\n")
for i, result in enumerate(search_results, 1):
    print(f"Result {i}:")
    print(f"Source: {result.metadata.get('source', 'Unknown')}")
    print(f"Content: {result.page_content}")
    print()


Top 2 most relevant chunks for the query: 'What are the key features of HackRF?'

Result 1:
Source: ./documents\DOC052841344.pdf
Content: CHAPTER
TWENTYTWO
INSTALLING HACKRF SOFTWARE
HackRF software includes HackRF Tools and libhackrf. HackRF Tools are the commandline utilities that let you
interact with your HackRF. libhackrf is a low level library that enables software on your computer to operate with
HackRF.
22.1 Install Using Package Managers
Unless developing or testing new features for HackRF, we highly recommend that most users use build systems or
package managers provided for their operating system.Our suggested operating system for use with HackRF is
Ubuntu.
22.1.1 FreeBSD
You can use the binary package:# pkg install hackrf
You can also build and install from ports:
# cd /usr/ports/comms/hackrf
# make install
22.1.2 Linux: Arch
pacman -S hackrf
22.1.3 Linux: Fedora / Red Hat
sudo dnf install hackrf -y
63

Result 2:
Source: ./documents\DOC052841344.pdf
Content: CHAPTER
TWENTY

In [22]:
retriever = vector_store.as_retriever(search_kwargs={"k": 2})
retriever_results = retriever.invoke("Features of Hackrf")
print(retriever_results)

[Document(id='83a0e35e-f127-4e4e-baf2-d93211e08a5c', metadata={'moddate': '2024-07-17T17:48:14+00:00', 'title': 'HackRF', 'creationdate': '2024-07-17T17:48:14+00:00', 'page_label': '63', 'keywords': '', 'source': './documents\\DOC052841344.pdf', 'total_pages': 94, 'page': 68, 'author': 'Great Scott Gadgets', 'creator': 'LaTeX with hyperref', 'trapped': '/False', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.22 (TeX Live 2022/dev/Debian) kpathsea version 6.3.4/dev', 'subject': '', 'producer': 'pdfTeX-1.40.22'}, page_content='CHAPTER\nTWENTYTWO\nINSTALLING HACKRF SOFTWARE\nHackRF software includes HackRF Tools and libhackrf. HackRF Tools are the commandline utilities that let you\ninteract with your HackRF. libhackrf is a low level library that enables software on your computer to operate with\nHackRF.\n22.1 Install Using Package Managers\nUnless developing or testing new features for HackRF, we highly recommend that most users use build systems or\npackage managers pr

In [23]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

template = """Answer the question based only on the following context:
{context}
Question: {question}
Answer: """

prompt = ChatPromptTemplate.from_template(template)

def docs2str(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | docs2str, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)
