In [1]:
!pip install -q langchain
!pip install -U -q langchain-community

!pip install -q llama-index pypdf
!pip install -q llama-index --upgrade

# Don't bother me with warnings
import warnings # optional, disabling warnings about versions and others
warnings.filterwarnings('ignore') # optional, disabling warnings about versions and others

In [2]:
# Loading a simple PDF with Langchain, straightforward

from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("docs/War-of-the-Worlds.pdf")
book = loader.load()
#Looking at a small extract, one page, and a few hundred characters in that page
page = book[3]
print(page.page_content[1660:2164])

   The Martians seem to have calculated their descent with amazing subtlety--their 
mathematical learning is evidently far in exce ss of ours--and to ha ve carried out their 
prepara- tions with a well-nigh perfect unanimity. Had our instru- ments permitted it, we 
might have seen the gathering trouble far back in the nineteenth century. Men like 
Schiaparelli watched the red planet--it is odd, by-the-bye, that for count- less centuries 
Mars has been the star of war--but failed to interpret the flu


In [3]:
# Loading a simple document with LlamaIndex, also straightforward

from llama_index.core import GPTVectorStoreIndex, Document
from pypdf import PdfReader

# Load a specific PDF file
pdf_path = "docs/War-of-the-Worlds.pdf"

# Use PyPDF to extract the text
reader = PdfReader(pdf_path)
pdf_text = ""
for page in reader.pages:
    pdf_text += page.extract_text()

# Create a LlamaIndex Document object from the extracted text
document = Document(text=pdf_text)

# Check the content of the document
print(document.text[8444:8944])  # Print 500 characters to verify



e calculated their descent with amazing subtlety--their 
mathematical learning is evidently far in exce ss of ours--and to ha ve carried out their 
prepara- tions with a well-nigh perfect unanimity. Had our instru- ments permitted it, we 
might have seen the gathering trouble far back in the nineteenth century. Men like 
Schiaparelli watched the red planet--it is odd, by-the-bye, that for count- less centuries 
Mars has been the star of war--but failed to interpret the fluctuating appearances of


In [5]:
# loading a video file and saving the audio to a text file, with LangChain
from datetime import datetime
start_time = datetime.now()

import os
import whisper
from yt_dlp import YoutubeDL

# Step 1: Set up the download options
url = "https://www.youtube.com/watch?v=2vkJ7v0x-Fs"
save_dir = "docs/youtube/"
output_template = os.path.join(save_dir, '%(title)s.%(ext)s')

ydl_opts = {
    'format': 'bestaudio/best',
    'outtmpl': output_template,  # Save the file to the specified directory with a title-based name
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'm4a',  # You can change this to mp3 if you prefer
        'preferredquality': '192',
    }],
    'ffmpeg_location': '/opt/homebrew/bin/ffmpeg',  # Specify the location of ffmpeg
}

# Step 2: Download the audio from the YouTube video
with YoutubeDL(ydl_opts) as ydl:
    ydl.download([url])

# Step 3: Find the downloaded file
downloaded_file = [f for f in os.listdir(save_dir) if f.endswith('.m4a')][0]  # Assuming m4a, adjust if using mp3
downloaded_file_path = os.path.join(save_dir, downloaded_file)

# Step 4: Load the Whisper model and transcribe the audio file
model = whisper.load_model("base")  # You can choose 'tiny', 'base', 'small', 'medium', or 'large'
result = model.transcribe(downloaded_file_path)

# Step 5: Adding metadata to the transcript, and saving the transcript to a file so we can use it outside of this program.
class Document:
    def __init__(self, source, text, metadata=None):
        self.source = source
        self.page_content = text
        self.metadata = metadata or {}

# Step 6: Wrap the transcription result in the Document class with metadata
document = Document(
    source=downloaded_file_path,
    text=result['text'], 
    metadata={"source": "youtube", "file_path": downloaded_file_path}
)
#Step 7: Save the transcript to a text file
transcript_file_path = os.path.join(save_dir, 'transcript_w_Langchain.txt')
with open(transcript_file_path, 'w') as f:
    f.write(result['text'])

end_time = datetime.now()

print("Duration:", end_time - start_time)


# Step 8: Print the first 1000 characters of the transcript
print(document.page_content[:1000])

[youtube] Extracting URL: https://www.youtube.com/watch?v=2vkJ7v0x-Fs
[youtube] 2vkJ7v0x-Fs: Downloading webpage
[youtube] 2vkJ7v0x-Fs: Downloading tv client config
[youtube] 2vkJ7v0x-Fs: Downloading player 14cfd4c0
[youtube] 2vkJ7v0x-Fs: Downloading tv player API JSON
[youtube] 2vkJ7v0x-Fs: Downloading ios player API JSON


         player = https://www.youtube.com/s/player/14cfd4c0/player_ias.vflset/en_US/base.js
         n = uLWPy0N1KOQ_eB5GY ; player = https://www.youtube.com/s/player/14cfd4c0/player_ias.vflset/en_US/base.js
         player = https://www.youtube.com/s/player/14cfd4c0/player_ias.vflset/en_US/base.js
         n = sPnoy0qhGqJ6oksVu ; player = https://www.youtube.com/s/player/14cfd4c0/player_ias.vflset/en_US/base.js
         player = https://www.youtube.com/s/player/14cfd4c0/player_ias.vflset/en_US/base.js
         n = Q36G6030tz93vLyvW ; player = https://www.youtube.com/s/player/14cfd4c0/player_ias.vflset/en_US/base.js


[youtube] 2vkJ7v0x-Fs: Downloading m3u8 information
[info] 2vkJ7v0x-Fs: Downloading 1 format(s): 234
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 330
[download] Destination: docs/youtube/Big Data Architectures.mp4
[download] 100% of   27.33MiB in 00:00:26 at 1.04MiB/s                    
[ExtractAudio] Destination: docs/youtube/Big Data Architectures.m4a
Deleting original file docs/youtube/Big Data Architectures.mp4 (pass -k to keep)
Duration: 0:01:32.220277
 In lesson four, we will go deeper into architectures for big data, and we will take a closer look at some of the most popular big data management systems. First, we're going to look at how the big data management system framework looks, and explore the commonalities that pretty much all the big data systems have, as well as some of the key differences between no SQL, MPP, and Hadoop. Next, we're going to take a deep dive into the Hadoop data management system. You will see how we both store data in HDFS, the 

In [6]:
# loading a video file and saving the audio to a text file, with LlamaIndex

import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import whisper
from yt_dlp import YoutubeDL
from llama_index.core import GPTVectorStoreIndex, Document as LlamaDocument
from llama_index.core.base.embeddings.base import BaseEmbedding
from sentence_transformers import SentenceTransformer
from pydantic import Field

start_time = datetime.now()

# Step 1: setup the option to download and transcribe YouTube video (same as before)
url = "https://www.youtube.com/watch?v=2vkJ7v0x-Fs"
save_dir = "docs/youtube/"
output_template = os.path.join(save_dir, '%(title)s.%(ext)s')

ydl_opts = {
    'format': 'bestaudio/best',
    'outtmpl': output_template,
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'm4a',
        'preferredquality': '192',
    }],
    'ffmpeg_location': '/opt/homebrew/bin/ffmpeg',
}

# Step 2: Download the audio
with YoutubeDL(ydl_opts) as ydl:
    ydl.download([url])

# Step 3: Find the downloaded file
downloaded_file = [f for f in os.listdir(save_dir) if f.endswith('.m4a')][0]
downloaded_file_path = os.path.join(save_dir, downloaded_file)

# Step 4: Transcribe the audio file using Whisper
model = whisper.load_model("base")
result = model.transcribe(downloaded_file_path)
transcribed_text = result['text']

# Step 5: Use Hugging Face SentenceTransformer for embedding
hf_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Custom embedding class using Hugging Face model
class HuggingFaceEmbedding(BaseEmbedding):
    hf_model: SentenceTransformer = Field(default=None)

    def __init__(self, hf_model):
        super().__init__()
        self.hf_model = hf_model

    def _get_text_embedding(self, text):
        # Return embedding for a single string of text
        return self.hf_model.encode(text, convert_to_numpy=True)

    def _get_text_embeddings(self, texts):
        # Return embeddings for a list of strings
        return self.hf_model.encode(texts, convert_to_numpy=True)
    
    def _get_query_embedding(self, query):
        # Return embedding for a query
        return self._get_text_embedding(query)
    
    async def _aget_query_embedding(self, query):
        # Async version of embedding for a query
        return self._get_text_embedding(query)

# Adding metadata to the transcript, and saving the transcript to a file
class Document:
    def __init__(self, source, text, metadata=None):
        self.source = source
        self.page_content = text
        self.metadata = metadata or {}

# Step 6: Wrap the transcription result in the Document class with metadata
document = Document(
    source=downloaded_file_path,
    text=transcribed_text, 
    metadata={"source": "youtube", "file_path": downloaded_file_path}
)

# Step 7: Save the transcript to a text file
transcript_file_path = os.path.join(save_dir, 'transcript_w_LlamaIndex.txt')
with open(transcript_file_path, 'w') as f:
    f.write(transcribed_text)

print(f"Transcript saved to {transcript_file_path}")

# Step 8: Use the LlamaIndex embedding model with the Hugging Face embedding model to retrieve and print the text
llama_document = LlamaDocument(text=transcribed_text)
embed_model = HuggingFaceEmbedding(hf_model)
index = GPTVectorStoreIndex([llama_document], embed_model=embed_model)

# Directly use the retriever (no LLM required)
retriever = index.as_retriever()

# Perform a query using the retriever
response = retriever.retrieve("What is the video about?") # this could be any question, as all we do below is retrieve the first 1000 characters of the transcript

end_time = datetime.now()

print("Duration:", end_time - start_time)

# Print the first 1000 characters of the response text
if response:
    shortened_response = response[0].node.text[:1000]  # Get the text from the node and limit to 500 characters
    print(f"Shortened Response: {shortened_response}")
else:
    print("No response retrieved")



[youtube] Extracting URL: https://www.youtube.com/watch?v=2vkJ7v0x-Fs
[youtube] 2vkJ7v0x-Fs: Downloading webpage
[youtube] 2vkJ7v0x-Fs: Downloading tv client config
[youtube] 2vkJ7v0x-Fs: Downloading player 14cfd4c0
[youtube] 2vkJ7v0x-Fs: Downloading tv player API JSON
[youtube] 2vkJ7v0x-Fs: Downloading ios player API JSON


         player = https://www.youtube.com/s/player/14cfd4c0/player_ias.vflset/en_US/base.js
         n = xaT7Z4NoeuiNhUKbZ ; player = https://www.youtube.com/s/player/14cfd4c0/player_ias.vflset/en_US/base.js
         player = https://www.youtube.com/s/player/14cfd4c0/player_ias.vflset/en_US/base.js
         n = xeclW4ksqFDZltLk8 ; player = https://www.youtube.com/s/player/14cfd4c0/player_ias.vflset/en_US/base.js
         player = https://www.youtube.com/s/player/14cfd4c0/player_ias.vflset/en_US/base.js
         n = DxQM80wn3auYkZahJ ; player = https://www.youtube.com/s/player/14cfd4c0/player_ias.vflset/en_US/base.js


[youtube] 2vkJ7v0x-Fs: Downloading m3u8 information
[info] 2vkJ7v0x-Fs: Downloading 1 format(s): 234
[hlsnative] Downloading m3u8 manifest
[hlsnative] Total fragments: 330
[download] Destination: docs/youtube/Big Data Architectures.mp4
[download] 100% of   27.33MiB in 00:00:25 at 1.07MiB/s                    
[ExtractAudio] Destination: docs/youtube/Big Data Architectures.m4a
Deleting original file docs/youtube/Big Data Architectures.mp4 (pass -k to keep)
Transcript saved to docs/youtube/transcript_w_LlamaIndex.txt
Duration: 0:01:34.384756
Shortened Response:  In lesson four, we will go deeper into architectures for big data, and we will take a closer look at some of the most popular big data management systems. First, we're going to look at how the big data management system framework looks, and explore the commonalities that pretty much all the big data systems have, as well as some of the key differences between no SQL, MPP, and Hadoop. Next, we're going to take a deep dive into the

## LlamaIndex vs LangChain
For now, both tools may look the same. One difference can easily be seen in LlamaIndex' ability to use a tree structure. The Tree Index allows you to build a hierarchical structure, where documents are segmented into chunks, and each chunk is stored as a node in a tree. This allows you to retrieve documents while keeping the context (the spot in the hierarchy where the data appeared). By contrast, LangChain operates on a flat structure.

In [7]:
# Example tree index structure with llamaindex

!pip install -q llama-index-llms-ollama

from llama_index.core import TreeIndex, SimpleDirectoryReader
from llama_index.llms.ollama import Ollama

# Initialize the Ollama LLM with Llama3
llama_llm = Ollama(model="llama3", timeout=500)

# Load a specific PDF file using SimpleDirectoryReader
pdf_path = "docs/802.11ae-2012_2.pdf"
documents = SimpleDirectoryReader(input_files=[pdf_path]).load_data()

# Create a hierarchical Tree Index using Llama3 from Ollama
tree_index = TreeIndex.from_documents(documents, llm=llama_llm)

# Access the storage context from the tree index
storage_context = tree_index.storage_context

# Function to print the tree structure
def print_tree_structure(node_id, level=0):
    try:
        # Retrieve the node object using the node_id from the storage context
        node = storage_context.docstore.get_document(node_id)  # Use the exact UUID as node_id
        
        # Print the node name/text with indentation
        indent = "  " * level  # Adjust indentation
        print(f"{indent}Node ID: {node_id}")
        print(f"{indent}Text: {node.text[:100]}...")  # Print the first 100 characters of the node text
        
        # Recursively print child nodes if they exist
        if hasattr(node, "child_ids") and node.child_ids:
            for child_id in node.child_ids:
                print_tree_structure(child_id, level + 1)
    except ValueError:
        print(f"Node ID {node_id} not found in storage context")

# Access the actual UUID root node IDs from the Tree Index
root_node_ids = list(tree_index.index_struct.root_nodes.values())

# Print the list of root node IDs to confirm
print("Actual Root Node IDs:", root_node_ids)

# Start printing from each root node using the actual UUIDs
for root_node_id in root_node_ids:
    print_tree_structure(root_node_id)


Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)


Actual Root Node IDs: ['23beba3a-6e34-453f-b810-bc48f233aa43', 'dbfe3080-e069-4d57-b8b5-6fb6e8ffc796']
Node ID: 23beba3a-6e34-453f-b810-bc48f233aa43
Text: IEEE Std 802.11ae-2012 AMENDMENT 1: PRIORITIZATION OF MANAGEMENT FRAMES
28 Copyright © 2012 IEEE. Al...
Node ID: dbfe3080-e069-4d57-b8b5-6fb6e8ffc796
Text: AMENDMENT 1: PRIORITIZATION OF MANAGEMENT FRAMES IEEE Std 802.11ae-2012
Copyright © 2012 IEEE. All r...


Could you do the same with LangChain? No, LangChain has a flat indexing structure. You 'could' use a workaround by sotring in the metadata elements such as the page number, and search the document text for elements that could look like a hierarchical element, like numbers for chapter.


In [8]:
from langchain.document_loaders import PyPDFLoader
import re

# Load the PDF
pdf_loader = PyPDFLoader("docs/802.11ae-2012_2.pdf")
documents = pdf_loader.load()

# Function to extract paragraph indices from the text
def extract_paragraph_indices(text):
    # Regular expression pattern to match paragraph numbers like 9.4.2.1, 9.4.2.2
    paragraph_pattern = re.findall(r'\b\d+(\.\d+)+\b', text)
    return paragraph_pattern if paragraph_pattern else None

# Loop through documents to extract metadata
for i, doc in enumerate(documents):
    text = doc.page_content
    paragraph_indices = extract_paragraph_indices(text)
    page_number = i + 1  # assuming pages are in order in 'documents'

    # Add paragraph indices and page metadata
    if paragraph_indices:
        doc.metadata['paragraph_indices'] = paragraph_indices
    doc.metadata['page_number'] = page_number

    # Print the metadata structure for this document
    print(f"Document {i + 1} metadata: {doc.metadata}")



Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)


Document 1 metadata: {'producer': 'macOS Version 14.6.1 (Build 23G93) Quartz PDFContext, AppendMode 1.1', 'creator': 'FrameMaker 9.0', 'creationdate': "D:20240921195505Z00'00'", 'keywords': 'IEEE 802.11, IEEE 802.11ae, management, QFM, QoS, quality-of-service             management frame', 'author': 'LAN/MAN Standards Committee of the IEEE Computer Society', 'aapl:keywords': "['IEEE 802.11', 'IEEE 802.11ae', 'management', 'QFM', 'QoS', 'quality-of-service             management frame']", 'title': 'IEEE Std 802.11ae-2012, IEEE Standard for Information Technology—Telecommunications and information exchange between systems—Local and metropolitan area networks—Specific requirements—Part 11: Wireless Medium Access Control (MAC) and physical layer (PHY) specifications—Amendment 1: Prioritization of Management Frames', 'subject': 'A mechanism for prioritization of management frames is provided and a protocol to communicate management frame prioritization policy is specified in this amendment.

## More on Splitters
In the previous lesson, we focused on recursive character splitter. Regardless of the framework you use, you need to spend some time understanding, and choosing the best splitter for your use case.

In [9]:
# Comparing 2 libraries 
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter

chunk_size =20
chunk_overlap = 5

rsplit = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap
)
csplit = CharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap
)

text1 = 'abcdefghijklmnopqrstuvwxyz1234567890'
text2 = 'a b c d e f g h i j k l m n o p q r s t u v w x y z 1 2 3 4 5 6 7 8 9 0'

In [10]:
rsplit.split_text(text1)

['abcdefghijklmnopqrst', 'pqrstuvwxyz123456789', '567890']

In [11]:
rsplit.split_text(text2)

['a b c d e f g h i j',
 'i j k l m n o p q r',
 'q r s t u v w x y z',
 'y z 1 2 3 4 5 6 7 8',
 '7 8 9 0']

In [12]:
# Character splitter does not do anything, because it considers by default the end of paragraph as the separator.
csplit.split_text(text1)

['abcdefghijklmnopqrstuvwxyz1234567890']

In [13]:
csplit.split_text(text2)

['a b c d e f g h i j k l m n o p q r s t u v w x y z 1 2 3 4 5 6 7 8 9 0']

In [14]:
# A longer text, and a more realistic split
chunk_size =700
chunk_overlap = 5

rsplit = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap,
    separators=["\n\n", "\n", "(?<=\. )", " ", ""]
)
csplit = CharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap
)

In [15]:
text3 = """ The Martians seem to have calculated their descent with amazing subtlety--their mathematical learning is evidently far in excess of ours--and to have carried out their prepara- tions with a well-nigh perfect unanimity. Had our instru- ments 
permitted it, we might have seen the gathering trouble far back in the nineteenth century. Men like Schiaparelli watched the red planet--it is odd, by-the-bye, that for count- less centuries Mars has been the star of war--but failed to interpret 
the fluctuating appearances of the markings they mapped so well. All that time the Martians must have been getting ready.
During the opposition of 1894 a great light was seen on the illuminated part of the disk, first at the Lick Observatory, then by Perrotin of Nice, and then by other observers. English readers heard of it first in the issue of NATURE dated August 2. I am 
inclined to think that this blaze may have been the casting of the huge gun, in the vast pit sunk into their planet, from which their shots were fired at us. Peculiar markings, as yet unexplained, were seen near the site of that outbreak during the next 
two oppositions.
The storm burst upon us six years ago now. As Mars approached opposition, Lavelle of Java set the wires of the astronomical exchange palpitating with the amazing intelli- gence of a huge outbreak of incandescent gas upon the planet. It had occurred towards 
midnight of the twelfth; and the spectroscope, to which he had at once resorted, indicated a mass of flaming gas, chiefly hydrogen, moving with an enormous velocity towards this earth. This jet of fire had become invisible about a quarter past twelve. He 
compared it to a colossal puff of flame suddenly and violently squirted out of the planet, as flaming gases rushed out of a gun. """

In [16]:
rsplit.split_text(text3)

['The Martians seem to have calculated their descent with amazing subtlety--their mathematical learning is evidently far in excess of ours--and to have carried out their prepara- tions with a well-nigh perfect unanimity. Had our instru- ments \npermitted it, we might have seen the gathering trouble far back in the nineteenth century. Men like Schiaparelli watched the red planet--it is odd, by-the-bye, that for count- less centuries Mars has been the star of war--but failed to interpret \nthe fluctuating appearances of the markings they mapped so well. All that time the Martians must have been getting ready.',
 'During the opposition of 1894 a great light was seen on the illuminated part of the disk, first at the Lick Observatory, then by Perrotin of Nice, and then by other observers. English readers heard of it first in the issue of NATURE dated August 2. I am \ninclined to think that this blaze may have been the casting of the huge gun, in the vast pit sunk into their planet, from whi

In [17]:
chunks=rsplit.split_text(text3)
for i, _ in enumerate(chunks):
    print(f"chunk # {i}, size: {len(chunks[i])}")

chunk # 0, size: 610
chunk # 1, size: 526
chunk # 2, size: 642


In [18]:
print(chunks[0])

The Martians seem to have calculated their descent with amazing subtlety--their mathematical learning is evidently far in excess of ours--and to have carried out their prepara- tions with a well-nigh perfect unanimity. Had our instru- ments 
permitted it, we might have seen the gathering trouble far back in the nineteenth century. Men like Schiaparelli watched the red planet--it is odd, by-the-bye, that for count- less centuries Mars has been the star of war--but failed to interpret 
the fluctuating appearances of the markings they mapped so well. All that time the Martians must have been getting ready.


In [19]:
# What happens if the split is smaller:
chunk_size =300
chunk_overlap = 5
rsplit = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap,
    separators=["\n\n", "\n", "(?<=\. )", " ", ""]
)

In [20]:
chunks=rsplit.split_text(text3)
for i, _ in enumerate(chunks):
    print(f"chunk # {i}, size: {len(chunks[i])}")

chunk # 0, size: 240
chunk # 1, size: 245
chunk # 2, size: 121
chunk # 3, size: 253
chunk # 4, size: 271
chunk # 5, size: 256
chunk # 6, size: 254
chunk # 7, size: 128


In [21]:
print(chunks[0])

The Martians seem to have calculated their descent with amazing subtlety--their mathematical learning is evidently far in excess of ours--and to have carried out their prepara- tions with a well-nigh perfect unanimity. Had our instru- ments


In [22]:
# What happens if the split is smaller:
chunk_size =100
chunk_overlap = 5
rsplit = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap,
    separators=["\n\n", "\n", "(?<=\. )", " ", ""]
)

In [23]:
chunks=rsplit.split_text(text3)
for i, _ in enumerate(chunks):
    print(f"chunk # {i}, size: {len(chunks[i])}")

chunk # 0, size: 92
chunk # 1, size: 96
chunk # 2, size: 52
chunk # 3, size: 99
chunk # 4, size: 95
chunk # 5, size: 54
chunk # 6, size: 96
chunk # 7, size: 29
chunk # 8, size: 98
chunk # 9, size: 95
chunk # 10, size: 61
chunk # 11, size: 97
chunk # 12, size: 90
chunk # 13, size: 73
chunk # 14, size: 16
chunk # 15, size: 97
chunk # 16, size: 92
chunk # 17, size: 74
chunk # 18, size: 97
chunk # 19, size: 96
chunk # 20, size: 69
chunk # 21, size: 93
chunk # 22, size: 37


In [24]:
print(chunks[0])

The Martians seem to have calculated their descent with amazing subtlety--their mathematical


In [25]:
# LlamaIndex has different splitters, including sentence splitter, that makes splits based on sentences (period). 

from llama_index.core.node_parser import SentenceSplitter

from llama_index.core.node_parser import SentenceSplitter

# Initialize the SentenceSplitter with your chunk size and overlap
ssplit = SentenceSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap
)

# Apply the splitter on your text
split_texts = ssplit.split_text(text1)

# Display the split chunks
for idx, chunk in enumerate(split_texts):
    print(f"Chunk {idx + 1}:")
    print(chunk)
    print("\n" + "-" * 50 + "\n")



Chunk 1:
abcdefghijklmnopqrstuvwxyz1234567890

--------------------------------------------------



Both frameworks have splitters with similar goals, including for LangChain (beyond CharacterSplitter and RecursiveCharacterSplitter) HTML spliiter (HTMLHeaderTextSplitter, to chop HTNL pages while following the page structure), Code splitter (RecursiveCharacterSplitter with the option e.g. language=language.PYTHON), recursive JSON splitter (RecursiveJSONSplitter), Semantic splitter (SemanticChunker, splits in sentences and uses LLM to try to find semantic structures to group chunks that form cohernet semantic ensembles) and tokens, and for LlamaIndex (beyond SentenceSplitter and TokenTExtSplitter), HTML (HTMLNodeParser), JSON (JSONNodeParser), Markdown (MarkdownNodeParser), Code (CodeSplitter with the option e.g. language="python"), Hierarchical splitting (HierarchicalNodeParser, to attmept the best hierarchy of splits based on semantic meaning) and semantic splitting (SemanticSplitterNodeParser).

## More on Similarity search

The goal of the retrieval phase is to select the most relevant documents. But 'relevant' may mean 'repeating the same most relevant segment', which is suboptimal. 

In [26]:

#tempdb.delete_collection() # deleting leftovers from previous instances, as I run this codebook often
from langchain.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
# Set the environment variable to disable tokenizers parallelism and avoid warnings
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Let's define a directory where we'll store the database beyond this notebook execution (and let's make sure it is emtpy, as I run this notebook often :))
persist_directory = 'docs/chroma/'
embeddings = OllamaEmbeddings(model="nomic-embed-text")
!rm -rf ./docs/chroma  # remove old database files if any


  embeddings = OllamaEmbeddings(model="nomic-embed-text")


In [27]:
text4 = [
    """The alien spaceships looked like flying saucers.""",
    """The alien spaceships were round in shape.""",
    """The spaceships were destroying everything.""",
]

In [28]:
tempdb = Chroma.from_texts(text4, embedding=embeddings)

In [29]:
question = "What can you tell me about the alien spaceships?"

In [30]:
tempdb.similarity_search(question, k=2)

[Document(metadata={}, page_content='The alien spaceships were round in shape.'),
 Document(metadata={}, page_content='The alien spaceships looked like flying saucers.')]

Similarity search points to the documents that are closest semantically to the question, which may include a lot of redundant information, and miss some key points, for example that the alien spaceships were destroying everything. Max Marginal Relevance (MMR) search improves Similarity Search by picking the top k as Similarity Search does, but returning the vectors that are farthest from each other (in this top k list), so as to maximize the diversity of information returned.

In [31]:
tempdb.max_marginal_relevance_search(question,k=2, fetch_k=5)

Number of requested results 5 is greater than number of elements in index 3, updating n_results = 3


[Document(metadata={}, page_content='The alien spaceships were round in shape.'),
 Document(metadata={}, page_content='The spaceships were destroying everything.')]

## LangChain Chains and Tools

LangChain offers a series of tools that can be called using chains, making the process of using multiple sources for the LLM input data very flexible.

In [32]:
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser

In [39]:
from langchain.prompts import ChatPromptTemplate
from langchain.llms import Ollama


# Define a prompt template, used to build a prompt from various elements
prompt = ChatPromptTemplate.from_template(
    "tell me a short useful fact about {topic}"
)

# Use Ollama with Llama3 model
model = Ollama(model="qwen:1.8b")

# Define the output parser, which simply takes the LLM output and displays it as a string
output_parser = StrOutputParser()

In [40]:
# With LangChain you can define a chain of elements, here the prompt (output) we built is redirected into the model (as input), the model output is redirected to the parser, which uses it as input to output... a string of what the model sent 
chain = prompt | model | output_parser

In [41]:
# Let's call the chain
chain.invoke({"topic": "Paris"})

'Paris, the capital city of France, is home to numerous historical landmarks and cultural attractions. Some notable facts about Paris include:\n\n1. The Louvre Museum: One of the most famous museums in the world, the Louvre houses an extensive collection of art from various periods, including the Mona Lisa and Venus de Milo.\n\n2. Notre-Dame Cathedral: A masterpiece of Gothic architecture, Notre-Dame Cathedral is located on the Île de la Cité in Paris. The cathedral has been a major landmark of Paris since it was completed in 1345.\n\n3. Montmartre: A historic neighborhood in Paris that is known for its artistic heritage, charming streets, and numerous iconic landmarks, including the Sacré-Cœur Basilica and Place des Vosges.\n\nThese are just a few notable facts about Paris, but they provide a good starting point for learning more about this iconic city.'

In [42]:
# The same logic can be used in RAG. Suppose that we have two chunks.

!pip install -q docarray
from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores import DocArrayInMemorySearch

# Initialize the Ollama embedding model
embedding = OllamaEmbeddings(model="nomic-embed-text")

# Create the vector store with two chunks of text (for simplicity. Feel free to load a full pdf or video transcript as we did above in in the previous lessons if you prefer).
vectorstore = DocArrayInMemorySearch.from_texts(
    ["The Martians landed in the UK", "The river that flows in Paris is La Seine"],
    embedding=embedding
)

# Use the vector store as a retriever
retriever = vectorstore.as_retriever()

In [43]:
# Let's see what the retriever does when we ask a simple question - it should retrieve the available chunks, here both chunks irrespective of the question, as we do not run similarity search, just memory search (what is the the memory)
retriever.get_relevant_documents("where did the Martians land?")

  retriever.get_relevant_documents("where did the Martians land?")


[Document(metadata={}, page_content='The Martians landed in the UK'),
 Document(metadata={}, page_content='The river that flows in Paris is La Seine')]

In [44]:
# Now let's create a prompt template as above, that tells the LLM to use the context (i.e. the best chunk) to answer the question
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

In [45]:
# We want to pass the context (the 2 sentences retrieved from the vector store) and the question to the LLM. We use RunnableMap to create a dictionary with 2 elements, the context and the question
from langchain.schema.runnable import RunnableMap
inputs = RunnableMap({
    "context": lambda x: retriever.get_relevant_documents(x["question"]), # the context is retrieved by sending the question to the retriever, as we did manually 2 blocks above; the context is the first part of the dictionary
    "question": lambda x: x["question"] # the second element of the dictionary is the question itself 
})

In [46]:
# Let's see what the dictionary looks like
inputs.invoke({"question": "where did the Martians land?"})

{'context': [Document(metadata={}, page_content='The Martians landed in the UK'),
  Document(metadata={}, page_content='The river that flows in Paris is La Seine')],
 'question': 'where did the Martians land?'}

In [47]:
# We then want to pass the content of the dictionary to the prompt template (making a prompt of it), that is sent to the model, which output is sent to the parser, that makes a string from the answer. 
# Let's define the dictionary again, this time piping it into the prompt, then to the model, then to the parser, and we call the whole chain 'chain'
chain = RunnableMap({
    "context": lambda x: retriever.get_relevant_documents(x["question"]), 
    "question": lambda x: x["question"]  
}) | prompt | model | output_parser 

In [48]:
# What happens when we call the chain?
chain.invoke({"question": "where did the Martians land?"})

'Based on the given context, the Martians landed in the UK. The context mentions two documents: one containing information about the Martians landing in the UK and another containing information about the river that flows in Paris. Both of these documents suggest that the Martians landed in the UK.'

# Chains and Chatbot UI

Let's go back to our War of Worlds example, and explore how we can expand our RAG system to query multiple sources, but also interface with humans through a nice UI

In [49]:
#First, let's bring back our RAG example, with the War of worlds loaded, chopped in chunks and 
# loaded into a Chroma database

# In case of need, cleaning and resetting the chromadb so you can do this exercize multiple times
import os
import shutil
import time
import logging
import warnings

from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# === Configuration ===
FILE_PATH = "docs/War-of-the-Worlds.pdf"
CHROMA_PERSIST_DIR = "chroma_store_chatbot"
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 100
EMBEDDING_MODEL = "nomic-embed-text"

# === Setup ===
warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.INFO)

# === Utility Functions ===

def clear_chroma_database(persist_dir):
    """Delete the Chroma database directory to start from scratch."""
    if os.path.exists(persist_dir):
        shutil.rmtree(persist_dir)
        logging.info(f"Chroma database at '{persist_dir}' has been cleared.")
    else:
        logging.info(f"No Chroma database found at '{persist_dir}' to clear.")

def load_and_split_document(filepath, chunk_size=1000, chunk_overlap=100):
    """Load the PDF file, split it into chunks, and return them with metadata."""
    start = time.time()
    loader = PyPDFLoader(filepath)
    documents = loader.load()
    logging.info(f"Document loaded in {time.time() - start:.2f} seconds.")
    
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    split_documents = splitter.split_documents(documents)

    for doc in split_documents:
        doc.metadata["source"] = filepath
    
    logging.info(f"Document split into {len(split_documents)} chunks.")
    return split_documents

def embed_and_store_document(filepath, persist_dir, embedding_model):
    """Load, embed, and store the document in Chroma."""
    logging.info(f"Processing document: {filepath}")
    embeddings = OllamaEmbeddings(model=embedding_model)

    start = time.time()
    vectorstore = Chroma(persist_directory=persist_dir, embedding_function=embeddings)
    logging.info(f"Chroma vectorstore initialized in {time.time() - start:.2f} seconds.")

    # Check if already processed (optional safeguard)
    existing_sources = set(m.get("source") for m in vectorstore.get()["metadatas"])
    if filepath in existing_sources:
        logging.info(f"Document already processed: {filepath}")
        return

    chunks = load_and_split_document(filepath, CHUNK_SIZE, CHUNK_OVERLAP)

    start = time.time()
    vectorstore.add_documents(chunks)
    vectorstore.persist()
    logging.info(f"Document embedded and stored in {time.time() - start:.2f} seconds.")

# === Main Workflow ===
if __name__ == "__main__":
    clear_chroma_database(CHROMA_PERSIST_DIR)
    embed_and_store_document(FILE_PATH, CHROMA_PERSIST_DIR, EMBEDDING_MODEL)


INFO:root:Chroma database at 'chroma_store_chatbot' has been cleared.
INFO:root:Processing document: docs/War-of-the-Worlds.pdf
INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
INFO:root:Chroma vectorstore initialized in 0.04 seconds.
INFO:root:Document loaded in 0.32 seconds.
INFO:root:Document split into 441 chunks.
INFO:root:Document embedded and stored in 19.34 seconds.


In [50]:
# In our RAG, we had 440+ chunks, let's verify that it is the case

# Load the persisted vectorstore
embedding = OllamaEmbeddings(model="nomic-embed-text")
vectordb = Chroma(persist_directory="chroma_store_chatbot", embedding_function=embedding)

# Count the number of documents (chunks)
print(f"Number of documents stored: {len(vectordb.get()['documents'])}")


Number of documents stored: 441


In [51]:
 # And le'ts bring back the context from the previous notebook.
question = "Did the spaceship come from the planet Mars?"
docs = vectordb.max_marginal_relevance_search(question,k=2, fetch_k=3)
#Using Qwen as the LLM, and Ollama as the wrapper to interact with Qwen
from langchain_community.llms import Ollama
llm = Ollama(model = "qwen:1.8b")

In [52]:
# In case you run this part indpendently, don't forget:
#!pip install -q ollama
#!ollama serve & ollama pull qwen:1.8b & ollama pull nomic-embed-text
#!pip install -q ollama langchain beautifulsoup4 chromadb gradio -q

In [53]:
# Let's wrap the RAG into a simple UI. 
import gradio as gr
import ollama
from bs4 import BeautifulSoup as bs
from langchain_community.embeddings import OllamaEmbeddings

# Create Ollama embeddings and vector store
#embeddings = OllamaEmbeddings(model="nomic-embed-text")
#vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

# Define the function to call the Ollama Llama3 model
def ollama_llm(question, context):
    # Explicitly create a new conversation with only the current prompt
    formatted_prompt = f"Question: {question}\n\nContext: {context}"
    response = ollama.chat(model='qwen:1.8b', messages=[{'role': 'user', 'content': formatted_prompt}])
    return response['message']['content']


# Define the RAG setup
retriever = vectordb.as_retriever()

def rag_chain(question):
    retrieved_docs = retriever.invoke(question)
    formatted_context = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return ollama_llm(question, formatted_context)

# Define the Gradio interface
def get_important_facts(question):
    return rag_chain(question)

# Create a Gradio app interface
iface = gr.Interface(
  fn=get_important_facts,
  inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
  outputs="text",
  title="RAG with Qwen",
  description="Ask questions",
)

# Launch the Gradio app
iface.launch()
# example q: did the aliens eventually go on to land on Venus?

INFO:httpx:HTTP Request: GET https://api.gradio.app/gradio-messaging/en "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://127.0.0.1:7864/gradio_api/startup-events "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: HEAD http://127.0.0.1:7864/ "HTTP/1.1 200 OK"


* Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.




INFO:httpx:HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


In [54]:
# Now, Ollama re-injects the previous question and asnwer into the model with the next question. But other LLMs would forget the previous question (remember Dolly?) You can add memory with a memory module.
# Let's also add a debug function to show what was passed to the LLM and the retriever.
import gradio as gr
import ollama
from bs4 import BeautifulSoup as bs
from langchain_community.embeddings import OllamaEmbeddings
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_core.runnables import Runnable

# Define the prompt template
template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum. Keep the answer as concise as possible. 

{context}

Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

# Define memory to store the previous exchanges
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# Custom Runnable LLM class with debugging
class OllamaLLM(Runnable):
    def __init__(self, llm_fn):
        self.llm_fn = llm_fn

    def invoke(self, input, config=None, **kwargs):
        # If the input is a StringPromptValue or similar object, treat it as a string
        question = str(input)  # Convert the input to a string if necessary
        context = kwargs.get("context", "")  # Retrieve context from kwargs if available

        # Print what was passed to the LLM
        print(f"Question passed to LLM: {question}")
        print(f"Context passed to LLM: {context}")

        # Handle additional kwargs such as stop, if needed
        stop = kwargs.get("stop", None)

        # If 'stop' or other arguments need to be passed to the LLM function, handle them here
        response = self.llm_fn(question, context)
        
        # Print the response from the LLM
        print(f"Response from LLM: {response}")

        return response

    def predict(self, input, **kwargs):
        return self.invoke(input, **kwargs)

    def __call__(self, *args, **kwargs):
        return self.invoke(*args, **kwargs)

# Instantiate the custom LLM class
ollama_llm_instance = OllamaLLM(ollama_llm)

# Define the conversational retrieval chain
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=ollama_llm_instance,
    retriever=retriever,
    memory=memory # add the memory module, to pass the previous exchange to the LLM as well
)

# Define the function to get important facts with debugging
def get_important_facts(question):
    # Print what is passed to the retriever
    print(f"Question passed to retriever: {question}")
    
    # Run the chain and capture the memory state
    response = qa_chain.run({"question": question})
    
    # Print what is in memory after the retrieval
    print(f"Memory state: {memory.buffer}")

    return response

# Create a Gradio app interface
iface = gr.Interface(
    fn=get_important_facts,
    inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
    outputs="text",
    title="RAG with Qwen",
    description="Ask questions",
)

# Launch the Gradio app
iface.launch()

# example q: did the aliens eventually go on to land on Venus?
# then: why do you say that?

INFO:httpx:HTTP Request: GET http://127.0.0.1:7865/gradio_api/startup-events "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: HEAD http://127.0.0.1:7865/ "HTTP/1.1 200 OK"


* Running on local URL:  http://127.0.0.1:7865

To create a public link, set `share=True` in `launch()`.




INFO:httpx:HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"


Question passed to retriever: did the aliens eventually go on to land on Venus?
Question passed to LLM: text="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nLessing has advanced excellent reasons fo r supposing that the Martians have actually \nsucceeded in effecting a landing on the planet  Venus. Seven months ago now, Venus and \nMars were in alignment with the sun; that  is to say, Mars was in opposition from the \npoint of view of an observer on Venus. Subsequently a peculiar luminous and sinuous \nmark- ing appeared on the unillumined half of the inner planet, and almost \nsimultaneously a faint dark mark of a similar sinuous character was detected upon a \nphotograph of the Martian disk. One needs to see the drawings of these ap- pearances in \norder to appreciate fully their remarkable resemblance in character.  \nAt any rate, whether we expect another inva sion

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


Response from LLM: According to the information provided, it is stated that "The Martians have what appears to have been an auditory organ, a single round drum at  the back of the head- body, and eyes with a visual range not very different from ours  except that, according to Philips, blue and violet were as black to them." Therefore, it can be inferred that the aliens eventually went on to land on Venus because they possess an auditory organ similar to our own, which enables them to perceive colors and emit sounds in a manner that is similar to humans.
Memory state: [HumanMessage(content='did the aliens eventually go on to land on Venus?', additional_kwargs={}, response_metadata={}), AIMessage(content='According to the information provided, it is stated that "The Martians have what appears to have been an auditory organ, a single round drum at  the back of the head- body, and eyes with a visual range not very different from ours  except that, according to Philips, blue and violet were

Now let's work on adding new sources of information, as you may ask about other things than just the content of the book

In [55]:
# Let's define a simple function that returns the weather for a location
!pip install -q openai
import sys
import os
import openai
from dotenv import load_dotenv

# Load environment variables from the .env file
load_dotenv(dotenv_path="keys.env")

# Retrieve the OpenWeatherMap API key from the environment variables
OPENWEATHER_API_KEY = os.environ.get('OPENWEATHER_API_KEY') # set a keys.env file in the parent directory, and set there your openweather API key

from langchain.agents import tool
import requests
from pydantic import BaseModel, Field

# Define the input schema
class CityInput(BaseModel):
    city: str = Field(..., description="City name to fetch weather data for")

# Tool to get the current weather
@tool(args_schema=CityInput)
def get_current_weather(city: str) -> dict:
    """Fetch current weather for a given city."""

    BASE_URL = "http://api.openweathermap.org/data/2.5/weather"
    
    # Parameters for the weather request
    params = {
        'q': city,
        'appid': OPENWEATHER_API_KEY,
        'units': 'metric',  # To get the temperature in Celsius
    }

    # Make the request
    response = requests.get(BASE_URL, params=params)
    
    if response.status_code == 200:
        results = response.json()
        weather_data = {
            "City": city,
            "Temperature": f"{results['main']['temp']}°C",
            "Weather Description": results['weather'][0]['description'],
            "Humidity": f"{results['main']['humidity']}%",
            "Wind Speed": f"{results['wind']['speed']} m/s",
            "Pressure": f"{results['main']['pressure']} hPa"
        }
    else:
        raise Exception(f"Weather API Request failed with status code: {response.status_code}")
    
    return weather_data



In [56]:
get_current_weather("Richmond")

{'City': 'Richmond',
 'Temperature': '21.86°C',
 'Weather Description': 'overcast clouds',
 'Humidity': '77%',
 'Wind Speed': '6.69 m/s',
 'Pressure': '1010 hPa'}

In [57]:
# Even without an LLM, we can of course build a nice sentence for the response
from langchain.agents import tool
import requests
from pydantic import BaseModel, Field

# Define the input schema
class CityInput(BaseModel):
    city: str = Field(..., description="City name to fetch weather data for")

# Tool to get the current weather
@tool(args_schema=CityInput)
def get_current_weather(city: str) -> dict:
    """Fetch current weather for a given city."""
    
    BASE_URL = "http://api.openweathermap.org/data/2.5/weather"
    
    # Parameters for the weather request
    params = {
        'q': city,
        'appid': OPENWEATHER_API_KEY,
        'units': 'metric',  # To get the temperature in Celsius
    }

    # Make the request
    response = requests.get(BASE_URL, params=params)
    
    if response.status_code == 200:
        results = response.json()
        temperature = results['main']['temp']
        weather_description = results['weather'][0]['description']
        humidity = results['main']['humidity']
        wind_speed = results['wind']['speed']
        pressure = results['main']['pressure']
    else:
        raise Exception(f"Weather API Request failed with status code: {response.status_code}")
    
    return (
        f"The current temperature in {city} is {temperature}°C with {weather_description}.\n"
        f"Humidity: {humidity}%, Wind Speed: {wind_speed} m/s, Pressure: {pressure} hPa."
    )



In [58]:
get_current_weather("Richmond")

'The current temperature in Richmond is 21.86°C with overcast clouds.\nHumidity: 77%, Wind Speed: 6.69 m/s, Pressure: 1010 hPa.'

Let's insert the weather code into the UI code, first with a cue on keyword to decide on weather vs book as the source 

In [59]:
import os
import re
import time
import requests
import logging
import warnings

import gradio as gr
import ollama
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.runnables import Runnable

# === Configuration ===
CHROMA_PERSIST_DIR = "chroma_store_chatbot"
FILE_PATH = "docs/War-of-the-Worlds.pdf"
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 100
EMBEDDING_MODEL = "nomic-embed-text"
OPENWEATHER_API_KEY = os.getenv("OPENWEATHER_API_KEY")  # Secure way to load API key

# === Setup ===
warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.INFO)

# === Weather Tool ===
def get_current_weather(city: str) -> str:
    BASE_URL = "http://api.openweathermap.org/data/2.5/weather"
    params = {
        'q': city,
        'appid': OPENWEATHER_API_KEY,
        'units': 'metric',
    }
    response = requests.get(BASE_URL, params=params)
    if response.status_code == 200:
        results = response.json()
        return (
            f"The current temperature in {city} is {results['main']['temp']}°C with {results['weather'][0]['description']}. "
            f"Humidity: {results['main']['humidity']}%, Wind Speed: {results['wind']['speed']} m/s, Pressure: {results['main']['pressure']} hPa."
        )
    else:
        raise Exception(f"Weather API Request failed: {response.status_code}")

# === Text Processing and Chroma Setup ===
def load_and_split_document(filepath):
    loader = PyPDFLoader(filepath)
    documents = loader.load()
    splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
    split_documents = splitter.split_documents(documents)
    for doc in split_documents:
        doc.metadata["source"] = filepath
    return split_documents

def prepare_chroma_vectorstore():
    embeddings = OllamaEmbeddings(model=EMBEDDING_MODEL)
    vectorstore = Chroma(persist_directory=CHROMA_PERSIST_DIR, embedding_function=embeddings)
    if not vectorstore.get()["documents"]:
        logging.info("Populating Chroma vectorstore...")
        chunks = load_and_split_document(FILE_PATH)
        vectorstore.add_documents(chunks)
        vectorstore.persist()
    return vectorstore.as_retriever()

# === Prompt Template ===
template = """Use the following context to answer the question.
If you don't know the answer, say so. Limit to three sentences. End with 'Thanks for asking!'

{context}

Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

# === Memory ===
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# === Custom Runnable LLM Wrapper ===
class OllamaLLM(Runnable):
    def __init__(self, llm_fn):
        self.llm_fn = llm_fn

    def invoke(self, input, config=None, **kwargs):
        question = str(input)
        context = kwargs.get("context", "")
        print(f"Question to LLM: {question}")
        print(f"Context: {context}")
        return self.llm_fn(question, context)

    def predict(self, input, **kwargs):
        return self.invoke(input, **kwargs)

    def __call__(self, *args, **kwargs):
        return self.invoke(*args, **kwargs)

# === Helper: Extract city from LLM answer ===
def extract_city_from_llm_answer(answer: str) -> str:
    match = re.search(r"\b(?:city|town|in|at)\s+([A-Z][a-zA-Z\s\-]+)\b", answer)
    if match:
        return match.group(1).strip()
    return ""

# === Instantiate Retrieval Chain ===
retriever = prepare_chroma_vectorstore()
ollama_llm_instance = OllamaLLM(ollama_llm)
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=ollama_llm_instance,
    retriever=retriever,
    memory=memory
)

# === Central Function ===
def get_important_facts(question):
    if "weather in" in question.lower():
        city = question.lower().split("weather in")[-1].strip().split()[0].capitalize()
        try:
            return get_current_weather(city)
        except Exception as e:
            return f"Weather lookup failed: {e}"
    else:
        return qa_chain.run({"question": question})

# === Gradio App ===
iface = gr.Interface(
    fn=get_important_facts,
    inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
    outputs="text",
    title="RAG Chatbot with Weather",
    description="Ask about Martians or weather!"
)

if __name__ == "__main__":
    iface.launch()

# eg. Find the name of  a large city in the book
# then What is the weather in London, UK?

INFO:httpx:HTTP Request: GET http://127.0.0.1:7866/gradio_api/startup-events "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: HEAD http://127.0.0.1:7866/ "HTTP/1.1 200 OK"


* Running on local URL:  http://127.0.0.1:7866

To create a public link, set `share=True` in `launch()`.


INFO:httpx:HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"


Question to LLM: text='Use the following pieces of context to answer the question at the end. If you don\'t know the answer, just say that you don\'t know, don\'t try to make up an answer.\n\ncarriages.  \n   Sheen, it seemed, had escaped destruction, but the place was silent and deserted. Here \nwe happened on no dead, though th e night was too dark for us to see into the side roads \nof the place. In Sheen my companion suddenl y com- plained of fain tness and thirst, and \nwe decided to try one of the houses.  \n   The first house we entered, after a little difficulty with the window, was a small semi-\ndetached villa, and I found nothing eatable left in the place but some mouldy cheese. \nThere was, however, water to drink; and I took a hatchet, whic h promised to be useful in \nour next house- breaking.  \n   We then crossed to a place where the road turns towards Mortlake. Here there stood a \nwhite house within a walled garden, and in the pantry of this domicile we found a store 

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


We can also integrate the city extraction directly from the LLM answer... but we have to be careful, for example we can try this:

In [60]:
!pip install -q spacy
import os
import re
import time
import requests
import logging
import warnings
import spacy

import gradio as gr
import ollama
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.runnables import Runnable

# === Configuration ===
CHROMA_PERSIST_DIR = "chroma_store_chatbot"
FILE_PATH = "docs/War-of-the-Worlds.pdf"
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 100
EMBEDDING_MODEL = "nomic-embed-text"
OPENWEATHER_API_KEY = os.getenv("OPENWEATHER_API_KEY")

# === Setup ===
warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.INFO)
nlp = spacy.load("en_core_web_sm")

# === Weather Tool ===
def get_current_weather(city: str) -> str:
    BASE_URL = "http://api.openweathermap.org/data/2.5/weather"
    params = {
        'q': city,
        'appid': OPENWEATHER_API_KEY,
        'units': 'metric',
    }
    print(f"Trying weather lookup for: {city}")
    response = requests.get(BASE_URL, params=params)
    if response.status_code == 200:
        results = response.json()
        return (
            f"The current temperature in {city} is {results['main']['temp']}°C with {results['weather'][0]['description']}. "
            f"Humidity: {results['main']['humidity']}%, Wind Speed: {results['wind']['speed']} m/s, Pressure: {results['main']['pressure']} hPa."
        )
    elif response.status_code == 404 and ',' not in city:
        params['q'] = f"{city},GB"
        print(f"Retrying with country code: {params['q']}")
        response = requests.get(BASE_URL, params=params)
        if response.status_code == 200:
            results = response.json()
            return (
                f"The current temperature in {city} is {results['main']['temp']}°C with {results['weather'][0]['description']}. "
                f"Humidity: {results['main']['humidity']}%, Wind Speed: {results['wind']['speed']} m/s, Pressure: {results['main']['pressure']} hPa."
            )
    raise Exception(f"Weather API Request failed: {response.status_code} - {response.text}")

# === Text Processing and Chroma Setup ===
def load_and_split_document(filepath):
    loader = PyPDFLoader(filepath)
    documents = loader.load()
    splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
    split_documents = splitter.split_documents(documents)
    for doc in split_documents:
        doc.metadata["source"] = filepath
    return split_documents

def prepare_chroma_vectorstore():
    embeddings = OllamaEmbeddings(model=EMBEDDING_MODEL)
    vectorstore = Chroma(persist_directory=CHROMA_PERSIST_DIR, embedding_function=embeddings)
    if not vectorstore.get()["documents"]:
        logging.info("Populating Chroma vectorstore...")
        chunks = load_and_split_document(FILE_PATH)
        vectorstore.add_documents(chunks)
        vectorstore.persist()
    return vectorstore.as_retriever()

# === Prompt Template ===
template = """Use the following context to answer the question.
If you don't know the answer, say so. Limit to three sentences. End with 'Thanks for asking!'

{context}

Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

# === Memory ===
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# === Custom Runnable LLM Wrapper ===
class OllamaLLM(Runnable):
    def __init__(self, llm_fn):
        self.llm_fn = llm_fn

    def invoke(self, input, config=None, **kwargs):
        question = str(input)
        context = kwargs.get("context", "")
        print(f"Question to LLM: {question}")
        print(f"Context: {context}")
        return self.llm_fn(question, context)

    def predict(self, input, **kwargs):
        return self.invoke(input, **kwargs)

    def __call__(self, *args, **kwargs):
        return self.invoke(*args, **kwargs)

# === Enhanced City Extraction (NER + Regex) ===
def extract_city_from_llm_answer(answer: str) -> str:
    # First try NER
    doc = nlp(answer)
    for ent in doc.ents:
        if ent.label_ == "GPE":
            return ent.text.strip()
    # Fallback regex if NER fails
    match = re.search(r"\b(?:city|town|in|at)\s+([A-Z][a-zA-Z\s\-]+)\b", answer)
    if match:
        return match.group(1).strip()
    # Final fallback: return first title-cased word
    words = answer.split()
    for word in words:
        if word.istitle():
            return word.strip()
    return ""

# === Instantiate Retrieval Chain ===
retriever = prepare_chroma_vectorstore()
ollama_llm_instance = OllamaLLM(ollama_llm)
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=ollama_llm_instance,
    retriever=retriever,
    memory=memory
)

# === Central Function ===
def get_important_facts(question):
    if "weather" in question.lower() and "martian" in question.lower():
        city_question = "What is the first large city mentioned in the book?"
        city_answer = qa_chain.run({"question": city_question})
        city = extract_city_from_llm_answer(city_answer)
        if not city:
            return "Could not determine the city from the document."
        try:
            weather_info = get_current_weather(city)
        except Exception as e:
            return f"City found: {city}, but weather retrieval failed: {e}"
        return f"The first large city mentioned in the book is {city}.\n\n{weather_info}\n\nThanks for asking!"
    elif "weather in" in question.lower():
        city = question.lower().split("weather in")[-1].strip().split()[0].capitalize()
        try:
            return get_current_weather(city)
        except Exception as e:
            return f"Weather lookup failed: {e}"
    else:
        return qa_chain.run({"question": question})

# === Gradio App ===
iface = gr.Interface(
    fn=get_important_facts,
    inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
    outputs="text",
    title="RAG Chatbot with Weather",
    description="Ask about Martians or weather!"
)

if __name__ == "__main__":
    iface.launch()
#e.g. From the uploaded book about Martians, what is the first large city mentioned? Give the weather in that city.

INFO:httpx:HTTP Request: GET http://127.0.0.1:7867/gradio_api/startup-events "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: HEAD http://127.0.0.1:7867/ "HTTP/1.1 200 OK"


* Running on local URL:  http://127.0.0.1:7867

To create a public link, set `share=True` in `launch()`.


INFO:httpx:HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"


Question to LLM: text='Use the following pieces of context to answer the question at the end. If you don\'t know the answer, just say that you don\'t know, don\'t try to make up an answer.\n\n"A MESSAGE RECEIVED FROM MARS."  \n             "REMARKABLE STORY FROM WOKING,"  \nand so forth. In addition, Ogilvy\'s wire to the Astronomical Exchange had roused every \nobservatory in the three kingdoms.  \n   There were half a dozen flies or more fr om the Woking station st anding in the road by \nthe sand pits, a basket- chaise from Chobham, and a rather lordly ca rriage. Besides that, \nthere was quite a heap of bicycles. In addition, a large number of people must have \nwalked, in spite of the heat  of the day, from Woking and Chertsey, so that there was \naltogether quite a considerable crowd--one or two gaily dressed ladies among the others. \n  It was glaringly hot, not a cloud in the sky nor a breath of wind, and the only shadow \nwas that of the few scattered pine trees. The burning h

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


Question to LLM: text='Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.\n\nChat History:\n\nHuman: What is a large city mentioned in the book and what is the weather there?\nAssistant: In the book "The History of London", the context is given as follows:\n\nPage 25:\n\n> **The History of London, Volume 1****\n>\n* * *\nIt is mentioned in the book that London is a large city with a rich history. The weather in London varies depending on the time of year and location within the city.\nIn general, London experiences hot summers with high humidity levels, and mild winters with moderate temperatures and low rainfall. However, the specific weather conditions in London can vary depending on factors such as the type of building (e.g., residential vs commercial), the location within the city (e.g., inner London vs outer London), and local events or weather patterns that may impact the weather in Lon

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


Question to LLM: text='Use the following pieces of context to answer the question at the end. If you don\'t know the answer, just say that you don\'t know, don\'t try to make up an answer.\n\nand so forth. Yet it was a little  too large for assurance on this idea. I felt an impatience to \nsee it opened. About eleven, as nothing seem ed happening, I walked  back, full of such \nthought, to my home in Maybury. But I found it difficult to get to work upon my abstract \ninvestigations.  \n   In the afternoon the appearance of the common had a ltered very much. The early \neditions of the evening papers had startled London with enormous headlines:\n\nand caused no more disturbance than dr unkards might have done. People rattling \nLondonwards peered into the darkness outside the carriage windows, and saw only a rare, \nflickering, vanishing spark dance up from the di rection of Horsell, a red glow and a thin \nveil of smoke driving across the stars, and thought that nothing more serious th

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


Question to LLM: text='Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.\n\nChat History:\n\nHuman: What is a large city mentioned in the book and what is the weather there?\nAssistant: In the book "The History of London", the context is given as follows:\n\nPage 25:\n\n> **The History of London, Volume 1****\n>\n* * *\nIt is mentioned in the book that London is a large city with a rich history. The weather in London varies depending on the time of year and location within the city.\nIn general, London experiences hot summers with high humidity levels, and mild winters with moderate temperatures and low rainfall. However, the specific weather conditions in London can vary depending on factors such as the type of building (e.g., residential vs commercial), the location within the city (e.g., inner London vs outer London), and local events or weather patterns that may impact the weather in Lon

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


Question to LLM: text='Use the following pieces of context to answer the question at the end. If you don\'t know the answer, just say that you don\'t know, don\'t try to make up an answer.\n\nand so forth. Yet it was a little  too large for assurance on this idea. I felt an impatience to \nsee it opened. About eleven, as nothing seem ed happening, I walked  back, full of such \nthought, to my home in Maybury. But I found it difficult to get to work upon my abstract \ninvestigations.  \n   In the afternoon the appearance of the common had a ltered very much. The early \neditions of the evening papers had startled London with enormous headlines:\n\nI go to London and see the busy multitudes in Fleet Street and the Strand, and it comes \nacross my mind that they are but the ghosts of  the past, haunting the streets that I have \nseen silent and wretched, going to and fro, phan- tasms in a dead city, the mockery of life \nin a galvanised body. And strange, too, it is to stand on Primrose H

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


Trying weather lookup for: London
Question to LLM: text='Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.\n\nChat History:\n\nHuman: What is a large city mentioned in the book and what is the weather there?\nAssistant: In the book "The History of London", the context is given as follows:\n\nPage 25:\n\n> **The History of London, Volume 1****\n>\n* * *\nIt is mentioned in the book that London is a large city with a rich history. The weather in London varies depending on the time of year and location within the city.\nIn general, London experiences hot summers with high humidity levels, and mild winters with moderate temperatures and low rainfall. However, the specific weather conditions in London can vary depending on factors such as the type of building (e.g., residential vs commercial), the location within the city (e.g., inner London vs outer London), and local events or weather patterns 

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


Question to LLM: text="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nof London. Yet a kind of eddy of people drove into its mouth; weaklings elbowed out of \nthe stream, who for the most part rested bu t a moment before pl unging into it again. A \nlittle way down the lane, with two friends be nding over him, lay a man with a bare leg, \nwrapped about with bloody rags. He was a lucky man to have friends.\n\nand so forth. Yet it was a little  too large for assurance on this idea. I felt an impatience to \nsee it opened. About eleven, as nothing seem ed happening, I walked  back, full of such \nthought, to my home in Maybury. But I found it difficult to get to work upon my abstract \ninvestigations.  \n   In the afternoon the appearance of the common had a ltered very much. The early \neditions of the evening papers had startled London with enormous headlines:\n\nDublin

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


Trying weather lookup for: London


You can add multiple other elements to your chain, for example linking to OpenStreet map to get coordinates, or test the LLM ability to recognize semantic elements like 'after', 'later' etc to surface a chronology of the events...


In [68]:

import os
import re
import requests
import spacy
import logging
import warnings
import gradio as gr

from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.runnables import Runnable

# === Configuration ===
CHROMA_PERSIST_DIR = "chroma_store_chatbot"
FILE_PATH = "docs/War-of-the-Worlds.pdf"
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 100
EMBEDDING_MODEL = "nomic-embed-text"
OPENWEATHER_API_KEY = os.getenv("OPENWEATHER_API_KEY")

# === Setup ===
warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.INFO)
nlp = spacy.load("en_core_web_sm")

# === Weather Tool ===
def get_current_weather(city: str) -> str:
    BASE_URL = "http://api.openweathermap.org/data/2.5/weather"
    params = {'q': city, 'appid': OPENWEATHER_API_KEY, 'units': 'metric'}
    response = requests.get(BASE_URL, params=params)
    if response.ok:
        data = response.json()
        return (
            f"The current temperature in {city} is {data['main']['temp']}\u00b0C with {data['weather'][0]['description']}. "
            f"Humidity: {data['main']['humidity']}%, Wind: {data['wind']['speed']} m/s, Pressure: {data['main']['pressure']} hPa."
        )
    return f"Could not get weather for {city}: {response.status_code}"

# === Chroma + RAG Setup ===
def load_and_split_document(filepath):
    loader = PyPDFLoader(filepath)
    documents = loader.load()
    splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
    return splitter.split_documents(documents)

def prepare_chroma_vectorstore():
    embeddings = OllamaEmbeddings(model=EMBEDDING_MODEL)
    vectorstore = Chroma(persist_directory=CHROMA_PERSIST_DIR, embedding_function=embeddings)
    if not vectorstore.get()["documents"]:
        logging.info("Populating Chroma vectorstore...")
        chunks = load_and_split_document(FILE_PATH)
        vectorstore.add_documents(chunks)
        vectorstore.persist()
    return vectorstore.as_retriever()

retriever = prepare_chroma_vectorstore()

# === Prompt Template for RAG ===
template = """Use the following context to answer the question.
If you don't know the answer, say so. Be concise and end with 'Thanks for asking!'.

{context}

Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# === Custom Ollama LLM Wrapper ===
class OllamaLLM(Runnable):
    def __init__(self, llm_fn): self.llm_fn = llm_fn
    def invoke(self, input, config=None, **kwargs):
        question = str(input)
        context = kwargs.get("context", "")
        return self.llm_fn(question, context)
    def predict(self, input, **kwargs): return self.invoke(input, **kwargs)
    def __call__(self, *args, **kwargs): return self.invoke(*args, **kwargs)

def ollama_llm(question, context=""):
    import ollama
    prompt = f"{context}\n\nQuestion: {question}\nHelpful Answer:"
    response = ollama.chat(model="qwen:1.8b", messages=[{"role": "user", "content": prompt}])
    return response['message']['content']

ollama_llm_instance = OllamaLLM(ollama_llm)

qa_chain = ConversationalRetrievalChain.from_llm(
    llm=ollama_llm_instance,
    retriever=retriever,
    memory=memory
)

# === Utility Functions ===
def extract_event_timeline():
    docs = retriever.get_relevant_documents("What are the key events of the Martian invasion?")
    context = "\n\n".join([doc.page_content for doc in docs])
    prompt = f"From the following context, list the major events of the Martian invasion in chronological order:\n\n{context}"
    return ollama_llm_instance.invoke(prompt)

def extract_sentiment_for_place(place: str):
    question = f"What is the emotional sentiment in {place} after the Martian landing?"
    return qa_chain.run({"question": question})

def get_location_coordinates(city: str):
    url = "https://nominatim.openstreetmap.org/search"
    params = {'q': city, 'format': 'json'}
    response = requests.get(url, params=params)
    if response.ok and response.json():
        loc = response.json()[0]
        return f"{city} is located at {loc['lat']}, {loc['lon']} (map: https://www.openstreetmap.org/?mlat={loc['lat']}&mlon={loc['lon']})"
    return f"Could not find coordinates for {city}."

def detect_cities_in_document(query="Where did the Martians land?"):
    docs = retriever.get_relevant_documents(query)
    full_text = "\n".join([doc.page_content for doc in docs])
    doc = nlp(full_text)
    cities = [ent.text.strip() for ent in doc.ents if ent.label_ == "GPE"]
    if not cities:
        matches = re.findall(r"\\b([A-Z][a-z]{2,}(?:\\s[A-Z][a-z]{2,})?)\\b", full_text)
        fallback = [m for m in matches if m.lower() not in {"martians", "england"}]
        cities = fallback
    return "Detected cities:\n" + "\n".join(sorted(set(cities)))

# === Gradio Interface ===
def answer_with_routing(question: str) -> str:
    detected = detect_cities_in_document(question)
    city_lines = [line.strip() for line in detected.splitlines() if line and "Detected cities" not in line]
    cities = [c for c in city_lines if c and not c.lower().startswith("us")]

    try:
        if "weather" in question.lower() and cities:
            weather_info = get_current_weather(cities[0])
            return f"City: {cities[0]}\n\n{weather_info}"

        elif "where is" in question.lower() and cities:
            return get_location_coordinates(cities[0])

        elif "emotion" in question.lower() or "feeling" in question.lower():
            return extract_sentiment_for_place(cities[0] if cities else "London")

        elif "timeline" in question.lower():
            return extract_event_timeline()

        else:
            return qa_chain.run({"question": question})

    except Exception as e:
        return f"❌ Failed while handling input:\n{e}"

iface = gr.Interface(
    fn=answer_with_routing,
    inputs=gr.Textbox(lines=2, placeholder="Ask about Martians, cities, maps, weather, or emotions..."),
    outputs="text",
    title="Multi-Tool Martian Assistant",
    description="Ask questions about the story and get real-time insights using routing logic."
)

if __name__ == "__main__":
    iface.launch()



# Show me a timeline of the Martian attack events.
# Show me cities mentioned and give me their coordinates


INFO:httpx:HTTP Request: GET http://127.0.0.1:7875/gradio_api/startup-events "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: HEAD http://127.0.0.1:7875/ "HTTP/1.1 200 OK"


* Running on local URL:  http://127.0.0.1:7875

To create a public link, set `share=True` in `launch()`.


INFO:httpx:HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


[32;1m[1;3mThe JSON blob contains the following information about a question related to Martians' location and weather conditions:

```json
{
   "action": "Final Answer",
   "action_input": {
     "location_martians": "Mars",
     "weather_martians": "extremely dry with minimal precipitation."
    }
}
```

Explanation:
1. The JSON blob starts with a `{}` curly bracket, which encloses the data structure.
2. Inside the curly bracket, there are multiple key-value pairs.
3. The first key-value pair is enclosed in double quotes (`"`) and has two values separated by commas (`,`) as follows:

```json
{
   "action": "Final Answer",
   "action_input": {
     "location_martians": "Mars",
     "weather_martians": "extremely dry with minimal precipitation."
    }
}
```

4. The second key-value pair is enclosed in single quotes (``) and has a single value separated by commas (`,`) as follows:

```json
{
   "action": "Final Answer",
   "action_input": {
     "location_martians": "Mars",
     "weat