In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain_groq import ChatGroq

llm = ChatGroq(model = "openai/gpt-oss-20b")
llm_response  = llm.invoke("What is the capital of France?")
print(llm_response)

content='The capital of France is **Paris**.' additional_kwargs={'reasoning_content': 'We need to answer: "What is the capital of France?" It\'s a straightforward question. The answer: Paris. Ensure correct.'} response_metadata={'token_usage': {'completion_tokens': 45, 'prompt_tokens': 78, 'total_tokens': 123, 'completion_time': 0.04536116, 'completion_tokens_details': {'reasoning_tokens': 27}, 'prompt_time': 0.004276282, 'prompt_tokens_details': None, 'queue_time': 0.047106087, 'total_time': 0.049637442}, 'model_name': 'openai/gpt-oss-20b', 'system_fingerprint': 'fp_e99e93f2ac', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None, 'model_provider': 'groq'} id='lc_run--b82c53c1-e4c8-4748-b2c9-8dbcff057e99-0' usage_metadata={'input_tokens': 78, 'output_tokens': 45, 'total_tokens': 123, 'output_token_details': {'reasoning': 27}}


In [3]:
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()
chain = llm | output_parser
parsed_response = chain.invoke("What is the capital of Germany?")
print(parsed_response)

The capital of Germany is **Berlin**.


In [4]:
from typing import List
from pydantic import BaseModel, Field

class RelocationDetails(BaseModel):
    city: str = Field(description="The city to which the person is relocating")
    country: str = Field(description="The country to which the person is relocating")
    from_city: str = Field(description="The city from which the person is relocating")
    from_country: str = Field(description="The country from which the person is relocating")
    recommended_neighborhoods: List[str] = Field(description="A list of recommended neighborhoods in the new city")
    average_cost_of_living: float = Field(description="The average monthly cost of living in the new city in USD")
    summary: str = Field(description="A brief summary of the relocation details")

relocation_text = """
A Candidate is living in london and is relocating to Berlin, Germany for a new job opportunity.
"""

structured_llm = llm.with_structured_output(RelocationDetails)
relocation_details = structured_llm.invoke(relocation_text)
print(relocation_details)

city='Berlin' country='Germany' from_city='London' from_country='United Kingdom' recommended_neighborhoods=['Mitte', 'Prenzlauer Berg', 'Friedrichshain', 'Kreuzberg'] average_cost_of_living=2500.0 summary='The candidate is moving from London, UK to Berlin, Germany for a new job opportunity. Berlin offers a lower average cost of living (~$2,500/month) compared to London and a vibrant tech scene. Recommended neighborhoods for professionals include Mitte for its central location, Prenzlauer Berg for its community vibe, Friedrichshain for nightlife, and Kreuzberg for cultural diversity.'


In [5]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(
  "Provide a brief summary about {topic}."
)
chain = prompt | llm | output_parser
result = chain.invoke({"topic": "the Eiffel Tower"})
print(result)

The Eiffel Tower is a wrought‑iron lattice tower located on the Champ de Mars in Paris, France. Designed by engineer Gustave Eiffel and completed in 1889 for the Exposition Universelle (World’s Fair), it stands 330 m (1,083 ft) tall, making it the tallest structure in Paris until the completion of the Montparnasse Tower in 1973. The tower originally served as a temporary exhibit but has since become an iconic symbol of France and a major tourist attraction, drawing millions of visitors each year. Its three levels are accessible by elevators and stairs, offering panoramic views of Paris. The structure was initially criticized for its design but has become celebrated for its engineering ingenuity and aesthetic influence on modern architecture.


In [6]:
from langchain_core.messages import HumanMessage, SystemMessage

messages = [
    SystemMessage(content="You are a helpful assistant that provides concise information."),
    HumanMessage(content="Tell me a fun fact about the Great Wall of China.")
]

result = llm.invoke(messages)
print(result)

template = ChatPromptTemplate([
  ("system", "You are a knowledgeable assistant."),
  ("human", "Explain the significance of {event} in history.")
])

chain = template | llm
response = chain.invoke({"event": "the fall of the Berlin Wall"})
print(response)

content='A fun fact: the Great Wall isn’t a single wall at all—it’s a network of walls and fortifications built by several dynasties over 2,000\u202fyears, stretching roughly 21,000\u202fkm (about 13,000\u202fmiles).' additional_kwargs={'reasoning_content': 'We need to provide a concise fun fact about the Great Wall. The instruction: "You are a helpful assistant that provides concise information." So keep it short. Provide a fun fact. Let\'s think: The Great Wall is not one continuous wall; it\'s a series of walls and fortifications. Or the Great Wall stretches over 21,000 km. Or the wall has many "ghost" or "dragon" shapes. Or the wall has a "dragon\'s head" at the southern end. Or the wall is visible from space? Actually no. Or the wall was built over centuries and different dynasties. Or the wall\'s construction used "rammed earth" and "tamped earth" etc. Or the wall\'s bricks were used in the 1970s for the Great Wall\'s "Great Wall of China" in the 1970s. Or the Great Wall is the l

In [11]:
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing import List
from langchain_core.documents import Document
import os

def load_documents(folder_path: str) -> List[Document]:
    documents = []
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if filename.endswith('.pdf'):
            loader = PyPDFLoader(file_path)
        elif filename.endswith('.docx'):
            loader = Docx2txtLoader(file_path)
        else:
            print(f"Unsupported file type: {filename}")
            continue
        documents.extend(loader.load())
    return documents

folder_path = "./documents"
documents = load_documents(folder_path)
print(f"Loaded {len(documents)} documents from the folder.")


Loaded 94 documents from the folder.


In [12]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, length_function=len)
splits = text_splitter.split_documents(documents)
print(f"Split into {len(splits)} chunks.")

Split into 137 chunks.


In [16]:
print(documents[0])

page_content='HackRF
Great Scott Gadgets
Jul 17, 2024' metadata={'producer': 'pdfTeX-1.40.22', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-07-17T17:48:14+00:00', 'author': 'Great Scott Gadgets', 'title': 'HackRF', 'subject': '', 'keywords': '', 'moddate': '2024-07-17T17:48:14+00:00', 'trapped': '/False', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.22 (TeX Live 2022/dev/Debian) kpathsea version 6.3.4/dev', 'source': './documents\\DOC052841344.pdf', 'total_pages': 94, 'page': 0, 'page_label': '1'}


In [18]:
print(splits[1])
print(splits[0].metadata)

page_content='USER DOCUMENTATION
1 Getting Help 1
2 FAQ 3
2.1 What is the Transmit Power of HackRF? . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3
2.2 What is the Receive Power of HackRF? . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 3
2.3 What is the minimum signal power level that can be detected by HackRF? . . . . . . . . . . . . . . . 4
2.4 Is HackRF full-duplex? . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4
2.5 Why isn’t HackRF One full-duplex? . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 4
2.6 How could the HackRF One design be changed to make it full-duplex? . . . . . . . . . . . . . . . . 4
2.7 What is the big spike in the center of my received spectrum? . . . . . . . . . . . . . . . . . . . . . . 5
2.8 How do I deal with the big spike in the middle of my spectrum? . . . . . . . . . . . . . . . . . . . . 5' metadata={'producer': 'pdfTeX-1.40.22', 'creator': 'LaTeX with hyperr

In [20]:
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings

embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
document_embeddings= embedding_function.embed_documents([split.page_content for split in splits])
print(document_embeddings[0][:5])

  from .autonotebook import tqdm as notebook_tqdm


[-0.16012603044509888, 0.00766132352873683, -0.04513724893331528, -0.07161771506071091, 0.005821555387228727]


In [22]:
from langchain_chroma import Chroma

collection_name = "Hackrf"

vector_store = Chroma.from_documents(
  collection_name = collection_name,
  documents = splits,
  embedding=embedding_function,
  persist_directory="./chroma_db" 
)

print("Vector store created and documents added.")

Vector store created and documents added.


In [23]:
query = "What are the key features of HackRF?"
search_results = vector_store.similarity_search(query, k=3)
print(f"\nTop 2 most relevant chunks for the query: '{query}'\n")
for i, result in enumerate(search_results, 1):
    print(f"Result {i}:")
    print(f"Source: {result.metadata.get('source', 'Unknown')}")
    print(f"Content: {result.page_content}")
    print()


Top 2 most relevant chunks for the query: 'What are the key features of HackRF?'

Result 1:
Source: ./documents\DOC052841344.pdf
Content: CHAPTER
TWENTYTWO
INSTALLING HACKRF SOFTWARE
HackRF software includes HackRF Tools and libhackrf. HackRF Tools are the commandline utilities that let you
interact with your HackRF. libhackrf is a low level library that enables software on your computer to operate with
HackRF.
22.1 Install Using Package Managers
Unless developing or testing new features for HackRF, we highly recommend that most users use build systems or
package managers provided for their operating system.Our suggested operating system for use with HackRF is
Ubuntu.
22.1.1 FreeBSD
You can use the binary package:# pkg install hackrf
You can also build and install from ports:
# cd /usr/ports/comms/hackrf
# make install
22.1.2 Linux: Arch
pacman -S hackrf
22.1.3 Linux: Fedora / Red Hat
sudo dnf install hackrf -y
63

Result 2:
Source: ./documents\DOC052841344.pdf
Content: HackRF
8 Chapt

In [24]:
retriever = vector_store.as_retriever(search_kwargs={"k": 2})
retriever_results = retriever.invoke("Features of Hackrf")
print(retriever_results)

[Document(id='83a0e35e-f127-4e4e-baf2-d93211e08a5c', metadata={'trapped': '/False', 'creationdate': '2024-07-17T17:48:14+00:00', 'source': './documents\\DOC052841344.pdf', 'title': 'HackRF', 'producer': 'pdfTeX-1.40.22', 'total_pages': 94, 'creator': 'LaTeX with hyperref', 'page_label': '63', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.22 (TeX Live 2022/dev/Debian) kpathsea version 6.3.4/dev', 'author': 'Great Scott Gadgets', 'keywords': '', 'subject': '', 'page': 68, 'moddate': '2024-07-17T17:48:14+00:00'}, page_content='CHAPTER\nTWENTYTWO\nINSTALLING HACKRF SOFTWARE\nHackRF software includes HackRF Tools and libhackrf. HackRF Tools are the commandline utilities that let you\ninteract with your HackRF. libhackrf is a low level library that enables software on your computer to operate with\nHackRF.\n22.1 Install Using Package Managers\nUnless developing or testing new features for HackRF, we highly recommend that most users use build systems or\npackage managers pr