In [1]:
# !pip install llama-index
# !pip install openai
# !pip install pypdf
# !pip install torch
# !pip install transformers
# !pip install accelerate # requires runtime restart

In [2]:
# SET LLM
# llm_predictor identifies chunks in the prompt to be queried from the index
# prompt_helper builds a prompt from the list of chunks queried from the index
max_input_size = 4096 # set maximum input size
num_outputs = 256 # set number of output tokens
chunk_overlap_ratio = 0.5 # max_chunk_overlap = 20  (removed)
chunk_size_limit = 600 # set chunk size limit

# llm_predictor = LLMPredictor(llm=OpenAI(temperature=0.8, model_name="text-davinci-003", max_tokens=num_outputs))
# prompt_helper = PromptHelper(max_input_size, num_outputs, chunk_overlap_ratio, chunk_size_limit=chunk_size_limit)

**Notes**
* RAG Pipeline: Retrieval Augmented Generation -> Index-ing + Query-ing
    * The Index stage follows a step in which documents are parsed into small chunks
* LlamaIndex supports building a pipeline to query multiple indexes, with underlying logic connecting the indexes
* ServiceContext: is a supporting module that holds attributes of index- and query-ingm such as the LLM used and the number of chunks used as context in the prompt
* It is possible to expand from an existing index by adding new documents (chunks) or by adding metadata.
    * https://gpt-index.readthedocs.io/en/latest/end_to_end_tutorials/usage_pattern.html#index-construction

In [3]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper
from llama_index import StorageContext, load_index_from_storage
from langchain import OpenAI
import os
from transformers import pipeline

# SET PATHS
root_dir_local = '/Users/brauliopf/'
root_dir_cloud = '/Users/brauliopf/brauliopf@gmail.com - Google Drive/My Drive/'
root_dir_cloudUM = '/Users/brauliopf/braulio@umich.edu - Google Drive/My Drive/'
# os.listdir(root_dir_local) # list all files in the directory

# LOAD DOCS
documents = SimpleDirectoryReader(root_dir_local + 'Documents/Dev/old/100-MLBook/Test').load_data()

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# # IF TRYING THIS... remember to pass the service_context when building the index

# # USE HUGGINGFACE LLM
# from llama_index.prompts.prompts import SimpleInputPrompt


# system_prompt = """<|SYSTEM|># StableLM Tuned (Alpha version)
# - StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
# - StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
# - StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.
# - StableLM will refuse to participate in anything that could harm a human.
# """

# # This will wrap the default prompts that are internal to llama-index
# query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")

# import torch
# from llama_index.llms import HuggingFaceLLM
# llm = HuggingFaceLLM(
#     context_window=4096, 
#     max_new_tokens=256,
#     generate_kwargs={"temperature": 0.7, "do_sample": False},
#     system_prompt=system_prompt,
#     query_wrapper_prompt=query_wrapper_prompt,
#     tokenizer_name="StabilityAI/stablelm-tuned-alpha-3b",
#     model_name="StabilityAI/stablelm-tuned-alpha-3b",
#     device_map="auto",
#     stopping_ids=[50278, 50279, 50277, 1, 0],
#     tokenizer_kwargs={"max_length": 4096},
#     # uncomment this if using CUDA to reduce memory usage
#     # model_kwargs={"torch_dtype": torch.float16}
# )
# service_context = ServiceContext.from_defaults(
#     chunk_size=1024, 
#     llm=llm,
# )

In [5]:
# LOAD/BUILD INDEX
try:
    storage_context = StorageContext.from_defaults(persist_dir="./storage")
    index = load_index_from_storage(storage_context)
    print("Index loaded from storage")
except:
    # This will create (or replace) a "./storage" folder and save the index there
    index = VectorStoreIndex.from_documents(documents,
                                            # service_context=service_context # use if custom LLM and not using global
                                            )
    index.storage_context.persist() # Save your index to a index.json file
    print("No index found in storage. Created a new one.")

Index loaded from storage


In [6]:
# QUERY THE INDEX

# generate a question answering experience
query_engine = index.as_query_engine(similarity_top_k=5) # use 5 most similar as context in prompt. defauts to 2
# generate a chat experience
# query_engine = index.as_chat_engine(similarity_top_k=5) # used to generate a chat experience

response = query_engine.query("List the types of Machine Learning algorithms and their use cases.")
print(response)


The types of Machine Learning algorithms are:
1. Supervised Learning: Used to build models that take a feature vector as input and output information that allows deducing the label for this feature vector.
2. Unsupervised Learning: Used to create a model that takes a feature vector as input and either transforms it into another vector or into a value that can be used to solve a practical problem.
3. Semi-Supervised Learning: Used to find a better model by using many unlabeled examples.
4. Reinforcement Learning: Used to learn a policy that takes the feature vector of a state as input and outputs an optimal action to execute in that state.
