In [4]:
!pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1


In [1]:
import os
from dotenv import load_dotenv

env_available = load_dotenv() # loads the environment variables from .env file
assert env_available, "No .env file found"

# Broadly still following the tutorial from Educative, but the llama index api is now completely different, so we have to adapt. 

In [2]:
from pydantic import BaseModel
from typing import Optional, List

class WikiPageList(BaseModel):
    """Data model for list of wiki pages"""
    pages: List[str]

In [3]:
from llama_index.program.openai import OpenAIPydanticProgram
def wikipage_list(query):
    """This function extracts the Wikipedia pages from the query using OpenAI's GPT-3 API and returns them as a list (but stored in a WikiPageList object). 
    Args: 
        query (str): The query string containing the Wikipedia pages to extract. It must start with `please index:` followed by the Wikipedia pages.
    Returns:
        WikiPageList: A data model object containing the list of Wikipedia pages extracted from the query.
    """
    openai_key = os.getenv("OPENAI_API_KEY")
    if not openai_key:
        raise ValueError("No OpenAI API key found")
    
    prompt_template_str = """
    Given the input {query}, extract the Wikipedia pages mentioned after "please index:" and return them as a list.
    If only one page is mentioned, return a single element list.
    """

    program = OpenAIPydanticProgram.from_defaults(
        output_cls=WikiPageList,
        prompt_template_str=prompt_template_str,
        verbose=True,
    )

    wikipage_requests = program(query=query)
    return wikipage_requests

wikipage_list("please index: Jesus Christ, Buddha, and Muhammad.")


Function call: WikiPageList with args: {"pages":["Jesus Christ","Buddha","Muhammad"]}


WikiPageList(pages=['Jesus Christ', 'Buddha', 'Muhammad'])

In [17]:
mathematician_requests = wikipage_list("please index: Alexander Grothendieck, Gottlieb Frege, and David Hilbert.")

Function call: WikiPageList with args: {"pages":["Alexander Grothendieck","Gottlieb Frege","David Hilbert"]}


# Create the documents function

In [7]:
# There are dedicate readers on LlamaHub. 
!pip install llama-index-readers-wikipedia
!pip install wikipedia

Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: wikipedia
  Building wheel for wikipedia (setup.py) ... [?25ldone
[?25h  Created wheel for wikipedia: filename=wikipedia-1.4.0-py3-none-any.whl size=11680 sha256=4c179c967466df3a152ed542c6deaa52936b813b3c93e69f2b07e2eec3a912f9
  Stored in directory: /Users/dominicculver/Library/Caches/pip/wheels/63/47/7c/a9688349aa74d228ce0a9023229c6c0ac52ca2a40fe87679b8
Successfully built wikipedia
Installing collected packages: wikipedia
Successfully installed wikipedia-1.4.0


In [9]:
from llama_index.readers.wikipedia import WikipediaReader

def create_wikidocs(wikipage_requests: WikiPageList):
    """This function creates a list of Wikipedia documents from the list of Wikipedia pages.
    Args:
        wikipage_requests (WikiPageList): A data model object containing the list of Wikipedia pages.
    Returns:
        List[str]: A list of Wikipedia documents extracted from the Wikipedia pages.
    """

    reader = WikipediaReader()
    documents = reader.load_data(wikipage_requests.pages)

    return documents
    

In [13]:
mathematician_requests.pages

['Alexander Grothendieck', 'Alan Turing', 'Gottlieb Frege', 'David Hilbert']

In [22]:
mathematician_docs = create_wikidocs(mathematician_requests)
# Somehow Alan Turing gets converted to alan tuning by the wikipedia module. 

In [23]:
mathematician_docs

[Document(id_='2042', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='Alexander Grothendieck (; German pronunciation: [ˌalɛˈksandɐ ˈɡʁoːtn̩ˌdiːk] ; French: [ɡʁɔtɛndik]; 28 March 1928 – 13 November 2014) was a German-born mathematician who became the leading figure in the creation of modern algebraic geometry. His research extended the scope of the field and added elements of commutative algebra, homological algebra, sheaf theory, and category theory to its foundations, while his so-called "relative" perspective led to revolutionary advances in many areas of pure mathematics. He is considered by many to be the greatest mathematician of the twentieth century.\nGrothendieck began his productive and public career as a mathematician in 1949. In 1958, he was appointed a research professor at the Institut des hautes études scientifiques (IHÉS) and remained there until 1970, when, driven by personal and political convictions,

# Create an index

In [26]:
from llama_index.core import VectorStoreIndex, Settings
from llama_index.core.node_parser import SentenceSplitter

Settings.text_splitter = SentenceSplitter(chunk_size=150, chunk_overlap=50)


def create_index(query):
    global index # why this?
    wikipage_requests = wikipage_list(query)
    docs = create_wikidocs(wikipage_requests)

    index = VectorStoreIndex.from_documents(
        documents=docs, 
        transformations=[Settings.text_splitter]
    )

    return index




In [28]:
doc_index = create_index("please index: Alexander Grothendieck, Gottlieb Frege, and David Hilbert.")

Function call: WikiPageList with args: {"pages":["Alexander Grothendieck","Gottlieb Frege","David Hilbert"]}


In [47]:
type(doc_index)

llama_index.core.indices.vector_store.base.VectorStoreIndex

# Create chat agent

In [48]:
# import libraries

import openai
import asyncio

# llama index libraries
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.core.callbacks.base import CallbackManager

# chain lit libraries
import chainlit as cl
from chainlit.input_widget import Select, TextInput

# your libraries
from index_wikipages import create_index


index: Optional[VectorStoreIndex] = None

@cl.on_chat_start
async def on_chat_start():
    global index
    # Settings
    settings = await cl.ChatSettings(
        [
            Select(
                id= "MODEL",
                label= "Choose which model to use.",
                values=["gpt-3.5-turbo", "gpt-4"],
                initial_index=0,
            ),
            
            TextInput(
                id="WikiPageRequest", 
                label="Request Wikipage"
            ),
        ]
    ).send()

def wikisearch_engine(index):
    query_engine = index.as_query_engine()
    return query_engine