In [1]:
 # Llama-index psql vector store installs
!pip install --upgrade pip
%pip install llama-index llama-index-vector-stores-postgres llama-cpp-python
# Installing the BM25 retriever
%pip install llama-index-retrievers-bm25
# Python connection to postgres
%pip install psycopg2
# Helpe modules
%pip install python-dotenv ipython-async
# Generating embeddings with SBERT 
%pip install langchain langchain-community sentence-transformers
%pip install llama-index-embeddings-langchain
# For a subsequent investigation into the generated embedding space
%pip install matplotlib seaborn scikit-learn
# postgres stuff
%pip install pgvector
%pip install psycopg2-binary

Collecting llama-index
  Using cached llama_index-0.12.35-py3-none-any.whl.metadata (12 kB)
Collecting llama-index-vector-stores-postgres
  Using cached llama_index_vector_stores_postgres-0.5.3-py3-none-any.whl.metadata (555 bytes)
Collecting llama-cpp-python
  Using cached llama_cpp_python-0.3.9.tar.gz (67.9 MB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting llama-index-agent-openai<0.5,>=0.4.0 (from llama-index)
  Using cached llama_index_agent_openai-0.4.7-py3-none-any.whl.metadata (438 bytes)
Collecting llama-index-cli<0.5,>=0.4.1 (from llama-index)
  Using cached llama_index_cli-0.4.1-py3-none-any.whl.metadata (1.5 kB)
Collecting llama-index-embeddings-openai<0.4,>=0.3.0 (from llama-index)
  

In [23]:
%pip install docker
%pip install -U langchain-huggingface

Note: you may need to restart the kernel to use updated packages.
Collecting langchain-huggingface
  Downloading langchain_huggingface-0.2.0-py3-none-any.whl.metadata (941 bytes)
Downloading langchain_huggingface-0.2.0-py3-none-any.whl (27 kB)
Installing collected packages: langchain-huggingface
Successfully installed langchain-huggingface-0.2.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
%load_ext ipython_async 

In [12]:
!mkdir postgres-data 

In [15]:
import docker
from docker.types import Mount
from docker.errors import NotFound, APIError

client = docker.from_env()
container_name = "pgvector-db"

try:
    # Try to get the existing container
    container = client.containers.get(container_name)
    if container.status != "running":
        print(f"Starting existing container: {container.name}")
        container.start()
    else:
        print(f"Container '{container.name}' is already running.")
except NotFound:
    print(f"Creating and starting new container: {container_name}")
    container = client.containers.run(
        image="ankane/pgvector:v0.5.1",
        name=container_name,
        detach=True,
        restart_policy={"Name": "unless-stopped"},
        environment={
            "POSTGRES_USER": "postgres",
            "POSTGRES_DB": "postgres",
            "POSTGRES_PASSWORD": "postgres"
        },
        mounts=[
            Mount(target="/var/lib/postgresql/data", source="postgres-data", type="volume")
        ],
        healthcheck={
            "test": ["CMD", "pg_isready", "-U", "postgres"],
            "interval": 5_000_000_000,  # nanoseconds
            "retries": 5,
        },
        ports={"5432/tcp": 5432}
    )

print(f"Container is running: {container.name}")


Creating and starting new container: pgvector-db
Container is running: pgvector-db


In [17]:
!netstat -ano | findstr :5432

  TCP    0.0.0.0:5432           0.0.0.0:0              LISTENING       38592
  TCP    [::]:5432              [::]:0                 LISTENING       38592
  TCP    [::1]:5432             [::]:0                 LISTENING       3872


In [18]:
import os
import psycopg2
from dotenv import load_dotenv
load_dotenv()

False

In [19]:
db_name = "vector_db"
user = "postgres"
host = "db" if os.getenv("DEVCONTAINER") == "true" else "localhost"
host

'localhost'

In [20]:
conn = psycopg2.connect(
    dbname="postgres",
    host=host,
    password="postgres",
    port=5432,
    user=user,
)
conn.set_client_encoding('UTF8')
conn.autocommit = True

with conn.cursor() as c:
    c.execute(f"DROP DATABASE IF EXISTS {db_name}")         # Create our vector db
    c.execute(f"CREATE DATABASE {db_name}")
    c.execute(f"CREATE EXTENSION IF NOT EXISTS vector")     # Install the extension 

In [21]:
from llama_index.vector_stores.postgres import PGVectorStore

vector_store = PGVectorStore.from_params(
    database=db_name,
    host=host,
    password="postgres",
    port=5432,
    user=user,
    table_name="physics_docs",
    embed_dim=384,
    hybrid_search=True
)

In [24]:
from llama_index.embeddings.langchain import LangchainEmbedding
from langchain_huggingface import HuggingFaceEmbeddings

# Load MiniLM embedding model (fast & efficient)
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Wrap the model for LlamaIndex
embed_model = LangchainEmbedding(embedding_model)

In [25]:
!git clone https://github.com/amadou-6e/ai-data-zoo.git 

Cloning into 'ai-data-zoo'...


In [28]:
!tar -xzf ai-data-zoo/markdown.zip

In [30]:
from pathlib import Path
from llama_index.core import SimpleDirectoryReader

# Define the directory containing documents
source_dir = Path("markdown")

# Load markdown documents
documents = SimpleDirectoryReader(source_dir, required_exts=[".md"], recursive=True).load_data()
print(f"Loaded {len(documents)} documents")

Loaded 16 documents


In [31]:
from llama_index.core import StorageContext, VectorStoreIndex

storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Index the documents and store embeddings
index = VectorStoreIndex.from_documents(
    documents, 
    storage_context=storage_context, 
    embed_model=embed_model, 
    show_progress=True,
)

Parsing nodes:   0%|          | 0/16 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/297 [00:00<?, ?it/s]

In [32]:
from IPython.display import display, Markdown

In [33]:
query = "Who developped / discovered the ADF-CFT correspondence?"
query = "Who is Richard Feynman?"

In [34]:
retriever = index.as_retriever(similarity_top_k=5)
results = retriever.retrieve(query)

for idx, node in enumerate(results):
    print(f"Result {idx+1}\n")
    print(f"Score: {node.score}")
    print(f"Start char idx: {node.dict()['node']['start_char_idx']}; End char idx: {node.dict()['node']['end_char_idx']}")
    print(f"Location: {Path(node.metadata['file_path']).parent.stem}")
    text_sample =  "\n".join(f"> {line}" for line in node.text[:600].split("\n"))
    display(Markdown(text_sample))
    print('-'*50)

Result 1

Score: 0.6395445251193371
Start char idx: 0; End char idx: 3147
Location: de-feynman


> # WIKIPEDIA
> 
> # Richard Feynman
> 
> Richard Phillips Feynman [ˈfammən] (\* 11. Mai 1918 in Queens, New York; + 15. Februar 1988 in Los Angeles) war ein US-amerikanischer Physiker und Nobelpreisträger des Jahres 1965.
> 
> Feynman gilt als einer der großen Physiker des 20. Jahrhunderts, der wesentliche Beiträge zum Verständnis der Quantenfeldtheorien geliefert hat. Zusammen mit Shin'ichirō Tomonaga und Julian Schwinger erhielt er 1965 den Nobelpreis für seine Arbeit zur Quantenelektrodynamik (QED). Seine anschauliche Darstellung quantenfeldtheoretischer elementarer Wechselwirkungen durch Feynman-

--------------------------------------------------
Result 2

Score: 0.5868013850928189
Start char idx: 31884; End char idx: 33934
Location: de-feynman


> 1979 (berichtet über die Zusammenarbeit mit Feynman Ende der 1940er Jahre)
> - Jörg Resag: Feynman und die Physik. Leben und Forschung eines außergewöhnlichen Menschen. Springer 2018, ISBN 978-3-662-54796-0.
> - = Silvan S. Schweber: Quantum Electrodynamics and the men who made it. Princeton University Press, 1994
> - = Leonard Mlodinow: Feynman's Rainbow. A Search for Beauty in Physics and in Life. 2003 (dt. Feynmans Regenbogen. Die Suche nach Schönheit in der Physik und im Leben. Reclam, 2005, ISBN 3-379-00826-5) – beschreibt autobiografisch die Zeit des Autors Anfang der 1980er Jahre am Caltec

--------------------------------------------------
Result 3

Score: 0.5666405479144437
Start char idx: 35673; End char idx: 38436
Location: de-feynman


> The dispersion of Feynman diagrams in post-war physics, University of Chicago Press 2005.
> - 2. Richard Feynman: The 1979 The Sir Douglas Robb Lectures (http://www.vega.org.uk/video/su bseries/8) Teile 1-4, University of Auckland, Grundlage des populärwissenschaftlichen Buches: QED: Die seltsame Theorie des Lichts und der Materie.
> - 3. Richard P. Feynman: Cargo Cult Science: Some remarks on science, pseudoscience, and learning how to not fool yourself. Caltech's 1974 commencement address. Ihttp://calteches.libr ary.caltech.edu/51/2/CargoCult.htm) In: caltech.edu. Abgerufen am 11. Mai 2018 (en

--------------------------------------------------
Result 4

Score: 0.5212680852791098
Start char idx: 33277; End char idx: 36287
Location: de-feynman


> Folge der 9. Staffel leihen sie sich Feynmans Van für einen Ausflug. In der 2. Folge der 11. Staffel treffen sie sich an Feynmans Grab, um ihn mit einer Runde romulanischem Ale (ein Getränk aus der fiktionalen Fernsehserie Raumschiff Enterprise) zu würdigen.
> - Im Film Oppenheimer, welcher die Ereignisse rund um das Manhattan Project in Los Alamos zeigt, wird Feynman von dem Schauspieler Jack Quaid verkörpert
> 
> # Videos
> 
> ▪ The 1979 The Sir Douglas Robb Lectures (http://www.vega.org.uk/video/subseries/8) University of Auckland, vierteiliges Video, Grundlage des populärwissenschaftlichen Buch

--------------------------------------------------
Result 5

Score: 0.5211869788877365
Start char idx: 22323; End char idx: 25175
Location: de-feynman


> – RICHARD P. FEYNMAN[40]
> 
> # Werke
> 
> ### Bücher von Feynman über Physik
> 
> - · mit Robert B. Leighton und Matthew Sands: The Feynman Lectures on Physics. 3 Bände, ISBN 0-201-02115-3 (dt. Feynman-Vorlesungen über Physik. 5. Bände, 6. Aufl. De Gruyter 2015), zuerst 1963/1965 bei Addison/Wesley (in Band 3 Quantenmechanik über Diracs Bra-Ket-Notation behandelt, als Anwendung Maser, Transistor, Josephson-Effekt), Die englische Originalausgabe ist online verfügbar (http://www.feynmanlectures.caltech.edu/)
> - QED: The Strange Theory of Light and Matter 1985 (dt. QED: Die seltsame Theorie des Lichts

--------------------------------------------------


In [36]:
from llama_index.core.vector_stores.types import VectorStoreQueryMode

retriever = index.as_retriever(similarity_top_k=3, vector_store_query_mode=VectorStoreQueryMode.SPARSE)
results = retriever.retrieve(query)

for idx, node in enumerate(results):
    print(f"Result {idx+1}\n")
    print(f"Score: {node.score}")
    print(f"Start char idx: {node.dict()['node']['start_char_idx']}; End char idx: {node.dict()['node']['end_char_idx']}")
    print(f"Location: {Path(node.metadata['file_path']).parent.stem}")
    text_sample =  "\n".join(f"> {line}" for line in node.text[:600].split("\n"))
    display(Markdown(text_sample))
    print('-'*50)

Result 1

Score: 0.09625329
Start char idx: 29321; End char idx: 32283
Location: de-feynman


> ISBN 3-492-22166-1) (u. a. über die Challenger-Katastrophe, sein Hobby Zeichnen, seine erste Frau Arlene, seinen Vater; mit dem Vortrag The value of science)
> 
> ### Bücher von Feynman über anderes
> 
> - The Art of Richard P. Feynman. Images by a Curious Character. ISBN 2-88449-047-7.
> - The Meaning of it all (dt. Was soll das alles? Gedanken eines Physikers. ISBN 3-492-23316-3) (drei populäre Vorlesungen Anfang der 60er Jahre an University of Washington, u. a. value of science)
> - The Pleasure of Finding Things out. The Best Short Works of Richard P. Feynman. Penguin 2001 (dt. Es ist so einfach

--------------------------------------------------
Result 2

Score: 0.0946916
Start char idx: 33277; End char idx: 36287
Location: de-feynman


> Folge der 9. Staffel leihen sie sich Feynmans Van für einen Ausflug. In der 2. Folge der 11. Staffel treffen sie sich an Feynmans Grab, um ihn mit einer Runde romulanischem Ale (ein Getränk aus der fiktionalen Fernsehserie Raumschiff Enterprise) zu würdigen.
> - Im Film Oppenheimer, welcher die Ereignisse rund um das Manhattan Project in Los Alamos zeigt, wird Feynman von dem Schauspieler Jack Quaid verkörpert
> 
> # Videos
> 
> ▪ The 1979 The Sir Douglas Robb Lectures (http://www.vega.org.uk/video/subseries/8) University of Auckland, vierteiliges Video, Grundlage des populärwissenschaftlichen Buch

--------------------------------------------------
Result 3

Score: 0.09284579
Start char idx: 0; End char idx: 3147
Location: de-feynman


> # WIKIPEDIA
> 
> # Richard Feynman
> 
> Richard Phillips Feynman [ˈfammən] (\* 11. Mai 1918 in Queens, New York; + 15. Februar 1988 in Los Angeles) war ein US-amerikanischer Physiker und Nobelpreisträger des Jahres 1965.
> 
> Feynman gilt als einer der großen Physiker des 20. Jahrhunderts, der wesentliche Beiträge zum Verständnis der Quantenfeldtheorien geliefert hat. Zusammen mit Shin'ichirō Tomonaga und Julian Schwinger erhielt er 1965 den Nobelpreis für seine Arbeit zur Quantenelektrodynamik (QED). Seine anschauliche Darstellung quantenfeldtheoretischer elementarer Wechselwirkungen durch Feynman-

--------------------------------------------------


In [37]:
from llama_index.core.vector_stores.types import MetadataFilters

# Create a filter that matches everything
filters = MetadataFilters(filters=[])  # Empty dictionary

all_nodes = vector_store.get_nodes(filters=filters)
embeddings = [node.embedding for node in all_nodes]
len(embeddings[0])

384

In [38]:
from llama_index.retrievers.bm25 import BM25Retriever
bm25_retriever = BM25Retriever.from_defaults(
    nodes=all_nodes,
    similarity_top_k=3,
)

# Query
query = "When did Witten get his Nobel Prize?"
hybrid_results = bm25_retriever.retrieve(query)

# Print results
for idx, node in enumerate(hybrid_results):
    print(f"Result {idx+1}\n")
    print(f"Score: {node.score}")
    print(f"Start char idx: {node.dict()['node']['start_char_idx']}; End char idx: {node.dict()['node']['end_char_idx']}")
    print(f"Location: {Path(node.metadata['file_path']).parent.stem}")
    text_sample = "\n".join(f"> {line}" for line in node.text[:600].split("\n"))
    display(Markdown(text_sample))
    print('-' * 50)

resource module not available on Windows
Result 1

Score: 6.016958713531494
Start char idx: 0; End char idx: 4485
Location: eng-feynman-the-development-of-the-qed-space-time-view


> # THE DEVELOPMENT OF THE SPACE-TIME VIEW OF QUANTUM ELECTRODYNAMICS<sup>∗</sup>
> 
> by
> 
> ### Richard P. Feynman
> 
> California Institute of Technology, Pasadena, California
> 
> Nobel Lecture, December 11, 1965.
> 
> We have a habit in writing articles published in scientific journals to make the work as finished as possible, to cover all the tracks, to not worry about the blind alleys or to describe how you had the wrong idea first, and so on. So there isn't any place to publish, in a dignified manner, what you actually did in order to get to do the work, although, there has been in these days, so

--------------------------------------------------
Result 2

Score: 5.186384201049805
Start char idx: 52707; End char idx: 57128
Location: eng-feynman-the-development-of-the-qed-space-time-view


> I didn't have the knowledge to understand the way these were defined in the conventional papers because they were expressed at that time in terms of creation and annihilation operators, and so on, which, I had not successfully learned. I remember that when someone had started to teach me about creation and annihilation operators, that this operator creates an electron, I said, "how do you create an electron? It disagrees with the conservation of charge," and in that way, I blocked my mind from learning a very practical scheme of calculation. Therefore, I had to find as many opportunities as po

--------------------------------------------------
Result 3

Score: 4.586615562438965
Start char idx: 0; End char idx: 5599
Location: de-juan-maldacena


> ![](_page_0_Picture_2.jpeg)
> 
> # Juan Maldacena
> 
> Juan Martín Maldacena (born 10 September 1968) is an Argentine theoretical physicist and the Carl P. Feinberg Professor in the School of Natural Sciences at the Institute for Advanced Study, Princeton. 3 He has made significant contributions to the foundations of string theory and quantum gravity. His most famous discovery is the AdS/CFT correspondence, a realization of the holographic principle in string theory.
> 
> # Biography
> 
> ![](_page_0_Picture_6.jpeg)
> 
> Instituto Balseiro at which Maldacena obtained his Physics licenciatura
> 
> Maldacen

--------------------------------------------------


In [39]:
%pip install llama-cpp-python
%pip install openai
%pip install llama-index-llms-llama-cpp
%pip install llama-index-postprocessor-colbert-rerank

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Collecting llama-index-llms-llama-cpp
  Using cached llama_index_llms_llama_cpp-0.4.0-py3-none-any.whl.metadata (4.5 kB)
Using cached llama_index_llms_llama_cpp-0.4.0-py3-none-any.whl (7.5 kB)
Installing collected packages: llama-index-llms-llama-cpp
Successfully installed llama-index-llms-llama-cpp-0.4.0
Note: you may need to restart the kernel to use updated packages.
Collecting llama-index-postprocessor-colbert-rerank
  Using cached llama_index_postprocessor_colbert_rerank-0.3.0-py3-none-any.whl.metadata (1.1 kB)
Using cached llama_index_postprocessor_colbert_rerank-0.3.0-py3-none-any.whl (3.5 kB)
Installing collected packages: llama-index-postprocessor-colbert-rerank
Successfully installed llama-index-postprocessor-colbert-rerank-0.3.0
Note: you may need to restart the kernel to use updated packages.


In [40]:
%pip install numpy matplotlib seaborn scikit-learn 

Note: you may need to restart the kernel to use updated packages.


In [42]:
import os
from utils import display_citation
from llama_index.llms.openai import OpenAI

In [44]:
from dotenv import load_dotenv
load_dotenv()
api_key = os.getenv("API_KEY")
assert api_key

In [46]:
# Load API key from environment variable
llm_openai = OpenAI(
    api_key=api_key,
    model="gpt-4o"
)

# Example usage
prompt = "Explain how RAG systems work."
display_citation(str(llm_openai.complete(prompt)))

> RAG systems, which stand for Retrieval-Augmented Generation systems, are a type of architecture used in natural language processing (NLP) to enhance the capabilities of language models by combining retrieval mechanisms with generative models. The primary goal of RAG systems is to improve the accuracy and relevance of generated responses by incorporating external information. Here's how they generally work:
> 
> 1. **Retrieval Component**: 
>    - The system first uses a retrieval mechanism to search a large corpus of documents or a database to find relevant information based on the input query or context.
>    - This retrieval process typically involves using techniques like TF-IDF, BM25, or more advanced neural retrieval models such as Dense Passage Retrieval (DPR) to identify the most pertinent documents or passages.
> 
> 2. **Augmentation**:
>    - The retrieved information is then used to augment the input to the generative model. This can involve concatenating the retrieved text with the original query or using it to modify the input in some other way.
>    - The idea is to provide the generative model with additional context or facts that it might not have been able to generate on its own due to the limitations of its training data.
> 
> 3. **Generation Component**:
>    - A generative model, often based on architectures like GPT (Generative Pre-trained Transformer) or BART (Bidirectional and Auto-Regressive Transformers), takes the augmented input and generates a response.
>    - The generative model uses both the original input and the retrieved information to produce a more informed and contextually relevant output.
> 
> 4. **Integration and Output**:
>    - The final output is a combination of the generative model's capabilities and the external knowledge provided by the retrieval component.
>    - This approach allows the system to generate responses that are not only coherent and contextually appropriate but also factually accurate and up-to-date, as it can leverage the latest information from the retrieval corpus.
> 
> RAG systems are particularly useful in applications where the knowledge base is too large to be fully encoded in a generative model's parameters, or where the information changes frequently, such as in customer support, question answering, and conversational agents. By integrating retrieval with generation, RAG systems can provide more accurate and reliable responses, making them a powerful tool in the field of NLP.

In [49]:
from llama_index.core import Settings
from llama_index.core.postprocessor import LLMRerank
from llama_index.core.query_engine import RetrieverQueryEngine

llm_openai = OpenAI(
    api_key=api_key,
    model="gpt-4o",
)

Settings.llm = llm_openai

# Configure reranker
postprocessor = LLMRerank(top_n=3, llm=llm_openai)

query_engine = RetrieverQueryEngine(
    retriever=index.as_retriever(llm=None, similarity_top_k=3),
    node_postprocessors=[postprocessor]
)
response = query_engine.query("What is m-theory")

In [50]:
response = query_engine.query("What is m-theory")
display_citation(response.response[:1000])

> M-theory is a theoretical framework in physics that aims to unify the various versions of superstring theory. It is considered a candidate for a fundamental theory of everything, combining general relativity and quantum mechanics. M-theory suggests that the fundamental building blocks of the universe are not one-dimensional strings, as in string theory, but rather two-dimensional membranes, or "branes." The theory is still not fully understood, and its complete formulation remains an open question in theoretical physics.

In [51]:
from llama_index.core.agent import StructuredPlannerAgent, FunctionCallingAgentWorker
from llama_index.core.tools import QueryEngineTool
from llama_index.core.tools import ToolMetadata
import nest_asyncio
nest_asyncio.apply()

# Create a query engine from our index
query_engine = index.as_query_engine()

# Wrap the query engine in a QueryEngineTool
query_engine_tool = QueryEngineTool(
    query_engine=query_engine,
    metadata=ToolMetadata(
        name="vector_index",
        description="A tool for retrieving information from a physics document index."
    )
)

# Create a function-calling worker for executing retrieval tasks
worker = FunctionCallingAgentWorker.from_tools(
    [query_engine_tool], 
    llm=llm_openai, 
    verbose=True,
    allow_parallel_tool_calls=False
)

# Initialize the structured planner agent with the worker
structured_planner = StructuredPlannerAgent(
    worker, 
    tools=[query_engine_tool], 
    verbose=True
)

# Test with a complex question
response = structured_planner.chat("How did research at Los Alamos and Bell Labs overlap?")
print(response)


=== Initial plan ===
Retrieve Los Alamos Research Information:
Use the vector_index tool to retrieve information about research conducted at Los Alamos. -> A summary of research topics and projects conducted at Los Alamos.
deps: []


Retrieve Bell Labs Research Information:
Use the vector_index tool to retrieve information about research conducted at Bell Labs. -> A summary of research topics and projects conducted at Bell Labs.
deps: []


Identify Overlapping Research Areas:
Compare the research topics and projects from Los Alamos and Bell Labs to identify any overlaps. -> A list of overlapping research areas between Los Alamos and Bell Labs.
deps: ['Retrieve Los Alamos Research Information', 'Retrieve Bell Labs Research Information']


Summarize Overlapping Research:
Summarize the overlapping research areas identified between Los Alamos and Bell Labs. -> A comprehensive summary of how research at Los Alamos and Bell Labs overlapped.
deps: ['Identify Overlapping Research Areas']


> R