# Tutorial by AI MakerSpace

https://www.youtube.com/watch?v=Dv1SJ9_LmaE&ab_channel=AIMakerspace

In [3]:
import nest_asyncio

nest_asyncio.apply()

In [None]:
import os
import getpass
import sys
import logging

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [1]:
import llama_index

llama_index.__version__

'0.9.13'

In [None]:
import openai


os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key: ")
openai.api_key = os.environ["OPENAI_API_KEY"]

In [10]:
os.environ["WANDB_NOTEBOOK_NAME"] = "intro_wiki.ipynb"
os.environ["WANDB_API_KEY"] = getpass.getpass("WandB API Key: ")

In [11]:
from llama_index import set_global_handler

set_global_handler("wandb", run_args={"project": "llamaindex-demo-v1"})
wandb_callback = llama_index.global_handler

In [20]:
from re import split
from llama_index import ServiceContext
from llama_index.node_parser import NodeParser, SimpleNodeParser
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter
from llama_index.llms import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

embed_model = OpenAIEmbedding()
chunk_size = 500
llm = OpenAI(
    temperature=0,
    model="gpt-4-0613",
    streaming=True
)

service_context = ServiceContext.from_defaults(
    llm=llm,
    chunk_size=chunk_size,
    embed_model=embed_model,
)

text_splitter = TokenTextSplitter(
    chunk_size=chunk_size
)

# node_parser = SimpleNodeParser(
#     text_splitter=text_splitter
# )

node_parser = SimpleNodeParser(
    chunk_size=chunk_size, 
)

In [21]:
from llama_index import GPTVectorStoreIndex

index = GPTVectorStoreIndex.from_documents([], service_context=service_context)

[34m[1mwandb[0m: Logged trace tree to W&B.


In [22]:
from llama_index.readers.wikipedia import WikipediaReader

movie_list = ["Barbie (film)", "Oppenheimer (film)"]

wiki_docs = WikipediaReader().load_data(pages=movie_list, auto_suggest=False)

In [23]:
for movie, wiki_doc in zip(movie_list, wiki_docs):
    nodes = node_parser.get_nodes_from_documents([wiki_doc])
    for node in nodes:
        node.metadata = {"title" : movie}
    index.insert_nodes(nodes)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/e

In [37]:
index.summary

'None'

In [24]:
wandb_callback.persist_index(index, index_name="wiki-index")

[34m[1mwandb[0m: Adding directory to artifact (/home/nauman/repos/private/experiments/llamaindex/wandb/run-20231210_165602-6yyzck1w/files/storage)... Done. 0.0s


In [27]:
from llama_index import load_index_from_storage

storage_context = wandb_callback.load_storage_context(
    artifact_url="fnauman/llamaindex-demo-v1/wiki-index:v0"
)

index = load_index_from_storage(storage_context, service_context=service_context)

[34m[1mwandb[0m:   5 of 5 files downloaded.  


INFO:llama_index.indices.loading:Loading all indices.
Loading all indices.
Failed to log trace tree to W&B: list index out of range
Loading all indices.
Failed to log trace tree to W&B: list index out of range


In [28]:
from llama_index.tools import FunctionTool
from llama_index.vector_stores.types import (
    VectorStoreInfo,
    MetadataInfo,
    ExactMatchFilter,
    MetadataFilters,
)
from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine

from typing import List, Tuple, Any
from pydantic import BaseModel, Field

top_k = 3

vector_store_info = VectorStoreInfo(
    content_info="semantic information about movies",
    metadata_info=[MetadataInfo(
        name="title",
        type="str",
        description="title of the movie, one of [Barbie (film), Oppenheimer (film)]",
    )]
)

In [29]:
class AutoRetrieveModel(BaseModel):
    query: str = Field(..., description="natural language query string")
    filter_key_list: List[str] = Field(
        ..., description="List of metadata filter field names"
    )
    filter_value_list: List[str] = Field(
        ...,
        description=(
            "List of metadata filter field values (corresponding to names specified in filter_key_list)"
        )
    )

In [30]:
def auto_retrieve_fn(
    query: str, filter_key_list: List[str], filter_value_list: List[str]
):
    """Auto retrieval function.

    Performs auto-retrieval from a vector database, and then applies a set of filters.

    """
    query = query or "Query"

    exact_match_filters = [
        ExactMatchFilter(key=k, value=v)
        for k, v in zip(filter_key_list, filter_value_list)
    ]
    retriever = VectorIndexRetriever(
        index, filters=MetadataFilters(filters=exact_match_filters), top_k=top_k
    )
    query_engine = RetrieverQueryEngine.from_args(retriever, service_context=service_context)

    response = query_engine.query(query)
    return str(response)

In [31]:
description = f"""\
Use this tool to look up semantic information about films.
The vector database schema is given below:
{vector_store_info.json()}
"""

auto_retrieve_tool = FunctionTool.from_defaults(
    fn=auto_retrieve_fn,
    name="semantic-film-info",
    description=description,
    fn_schema=AutoRetrieveModel
)

In [32]:
from llama_index.agent import OpenAIAgent

agent = OpenAIAgent.from_tools(
    tools=[auto_retrieve_tool],
)

In [33]:
response = agent.chat("Tell me what happens (briefly) in the Barbie movie.")
print(str(response))

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[34m[1mwandb[0m: Logged trace tree to W&B.


I apologize for the confusion. It seems that there is no specific information available about the plot of the Barbie movie. However, I can provide you with some general information about the film. "Barbie" is a 2023 fantasy comedy film directed by Greta Gerwig and written by Gerwig and Noah Baumbach. It is based on the Barbie fashion dolls by Mattel and is the first live-action Barbie film after numerous computer-animated films and specials. The film stars Margot Robbie as Barbie and Ryan Gosling as Ken, and it follows their journey of self-discovery after an existential crisis. The film received positive reviews and was a commercial success, grossing over $1.44 billion worldwide.


In [34]:
wandb_callback.finish()

