In [2]:
# %%
# Install necessary libraries for Llama-Index and related embeddings and LLMs.
%pip install llama-index
%pip install llama-index-vector-stores-faiss
%pip install -U sentence-transformers==2.2.2 
%pip install llama-index-embeddings-huggingface
%pip install llama-index-embeddings-instructor
%pip install llama-index-llms-huggingface
%pip install llama-index-llms-huggingface-api
%pip install llama-index-embeddings-huggingface-api
%pip install nest-asyncio

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[0mCollecting sentence-transformers==2.2.2
  Using cached sentence_transformers-2.2.2-py3-none-any.whl
[0mInstalling collected packages: sentence-transformers
  Attempting uninstall: sentence-transformers
    Found existing installation: sentence-transformers 2.7.0
    Uninstalling sentence-transformers-2.7.0:
      Successfully uninstalled sentence-tran

In [11]:
# %%
# Import logging module to configure logging settings.
import logging
import sys

# Set up basic logging configuration to output debug information to the console.
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [12]:
# %%
# Define the data for indexing. These are titles of various AI and Machine Learning topics.
data = [
    'Introduction to Artificial Intelligence and Machine Learning',
    'Supervised Learning Algorithms: Linear Regression and Logistic Regression',
    'Unsupervised Learning: Clustering Algorithms - KMeans and DBSCAN',
    'Neural Networks and Deep Learning: Introduction to TensorFlow and Keras',
    'Natural Language Processing (NLP) Techniques: Tokenization and Word Embeddings',
    'Computer Vision and Image Processing: Convolutional Neural Networks (CNNs)',
    'Reinforcement Learning and Decision Making: Q-Learning and Markov Decision Processes',
    'Time Series Forecasting: ARIMA and Exponential Smoothing Methods',
    'Model Evaluation and Performance Metrics: ROC Curves and Confusion Matrices',
    'Feature Engineering and Data Preprocessing: Handling Missing Data and Outliers'
]


In [13]:
# %%
# Import HuggingFaceEmbedding for embedding generation using a specific Hugging Face model.
# from llama_index.embeddings.huggingface import HuggingFaceEmbedding
# embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

In [14]:
from llama_index.embeddings.huggingface_api import HuggingFaceInferenceAPIEmbedding

remote_embed_api = HuggingFaceInferenceAPIEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

In [15]:
# %%
# Set up the HuggingFaceLLM to load the phi-1_5 model for local inference. (Commented out as it's not executed)

"""
from llama_index.llms.huggingface import HuggingFaceLLM

llm = HuggingFaceLLM(
    model_name="microsoft/phi-1_5",
    tokenizer_name="microsoft/phi-1_5",
    max_new_tokens=100,
    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95, "do_sample":True},
    device_map="auto",
)
"""

'\nfrom llama_index.llms.huggingface import HuggingFaceLLM\n\nllm = HuggingFaceLLM(\n    model_name="microsoft/phi-1_5",\n    tokenizer_name="microsoft/phi-1_5",\n    max_new_tokens=100,\n    generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95, "do_sample":True},\n    device_map="auto",\n)\n'

In [16]:
# %%
# Alternatively, set up the HuggingFaceInferenceAPI to use a remote model for inference.
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI

remote_llm_api = HuggingFaceInferenceAPI(
    model_name="HuggingFaceH4/zephyr-7b-alpha"
)

In [17]:
# %%
# Import necessary components for setting up and managing the Llama-Index with FAISS vector store.
from llama_index.core import (
    load_index_from_storage,
    VectorStoreIndex,
    StorageContext,
    Document,
    Settings,
)
from llama_index.vector_stores.faiss import FaissVectorStore
from IPython.display import Markdown, display

# Set the LLM and embedding model to use for the ServiceContext.
Settings.llm = remote_llm_api
Settings.embed_model = remote_embed_api

import faiss

# Configure FAISS vector store with the appropriate dimensions for embeddings.
d = 384
faiss_index = faiss.IndexFlatL2(d)

vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)


In [19]:
# %%
# Create documents from the data and build the vector store index from these documents.
import nest_asyncio

nest_asyncio.apply()
documents = [Document(text=t) for t in data]
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)

DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Introduction to Artificial Intelligence and Mac...
> Adding chunk: Introduction to Artificial Intelligence and Mac...
> Adding chunk: Introduction to Artificial Intelligence and Mac...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Supervised Learning Algorithms: Linear Regressi...
> Adding chunk: Supervised Learning Algorithms: Linear Regressi...
> Adding chunk: Supervised Learning Algorithms: Linear Regressi...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Unsupervised Learning: Clustering Algorithms - ...
> Adding chunk: Unsupervised Learning: Clustering Algorithms - ...
> Adding chunk: Unsupervised Learning: Clustering Algorithms - ...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Neural Networks and Deep Learning: Introduction...
> Adding chunk: Neural Networks and Deep Learning: Introduction...
> Adding chunk: Neural Networks and Deep Learning: Introduction...
DEBUG:llama_

In [20]:
# %%
# Save the created index to disk for later use.
index.storage_context.persist()

DEBUG:fsspec.local:open file: /Users/ishaansehgal/Documents/rag_demo/storage/docstore.json
open file: /Users/ishaansehgal/Documents/rag_demo/storage/docstore.json
open file: /Users/ishaansehgal/Documents/rag_demo/storage/docstore.json
DEBUG:fsspec.local:open file: /Users/ishaansehgal/Documents/rag_demo/storage/index_store.json
open file: /Users/ishaansehgal/Documents/rag_demo/storage/index_store.json
open file: /Users/ishaansehgal/Documents/rag_demo/storage/index_store.json
DEBUG:fsspec.local:open file: /Users/ishaansehgal/Documents/rag_demo/storage/graph_store.json
open file: /Users/ishaansehgal/Documents/rag_demo/storage/graph_store.json
open file: /Users/ishaansehgal/Documents/rag_demo/storage/graph_store.json
DEBUG:fsspec.local:open file: /Users/ishaansehgal/Documents/rag_demo/storage/image__vector_store.json
open file: /Users/ishaansehgal/Documents/rag_demo/storage/image__vector_store.json
open file: /Users/ishaansehgal/Documents/rag_demo/storage/image__vector_store.json


In [21]:
# %%
# Load the index from disk to enable querying.
vector_store = FaissVectorStore.from_persist_dir("./storage")
storage_context = StorageContext.from_defaults(
    vector_store=vector_store, persist_dir="./storage"
)
index = load_index_from_storage(storage_context=storage_context)

INFO:root:Loading llama_index.vector_stores.faiss.base from ./storage/default__vector_store.json.
Loading llama_index.vector_stores.faiss.base from ./storage/default__vector_store.json.
Loading llama_index.vector_stores.faiss.base from ./storage/default__vector_store.json.
DEBUG:llama_index.core.storage.kvstore.simple_kvstore:Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage/docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage/docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage/docstore.json.
DEBUG:fsspec.local:open file: /Users/ishaansehgal/Documents/rag_demo/storage/docstore.json
open file: /Users/ishaansehgal/Documents/rag_demo/storage/docstore.json
open file: /Users/ishaansehgal/Documents/rag_demo/storage/docstore.json
DEBUG:llama_index.core.storage.kvstore.simple_kvstore:Loading llama_index.core.storage.kvstore.simple_kvstore from ./storage/index_store.json.
Loading llama_index.core.storage.kvs

In [24]:
# %%
# Create a query engine from the loaded index and perform a query to retrieve relevant information.
query_engine = index.as_query_engine()
response = query_engine.query("Teach me about AI")

DEBUG:llama_index.core.indices.utils:> Top 2 nodes:
> [Node 0] [Similarity score:             0.557331] Introduction to Artificial Intelligence and Machine Learning
> [Node 6] [Similarity score:             0.65251] Reinforcement Learning and Decision Making: Q-Learning and Markov Decision Processes
> Top 2 nodes:
> [Node 0] [Similarity score:             0.557331] Introduction to Artificial Intelligence and Machine Learning
> [Node 6] [Similarity score:             0.65251] Reinforcement Learning and Decision Making: Q-Learning and Markov Decision Processes
> Top 2 nodes:
> [Node 0] [Similarity score:             0.557331] Introduction to Artificial Intelligence and Machine Learning
> [Node 6] [Similarity score:             0.65251] Reinforcement Learning and Decision Making: Q-Learning and Markov Decision Processes
DEBUG:urllib3.connectionpool:https://api-inference.huggingface.co:443 "POST /models/HuggingFaceH4/zephyr-7b-alpha HTTP/1.1" 200 None
https://api-inference.huggingface.co:4

In [26]:
# %%
# Display the query response in Markdown format.
display(Markdown(f"<b>{response}</b>"))


<b>

Artificial Intelligence (AI) is a branch of computer science that focuses on creating intelligent machines that can learn, reason, and make decisions like humans do. AI involves the use of algorithms and statistical models to enable computers to perform tasks that would normally require human intelligence, such as speech recognition, image processing, and decision-making.

AI is a rapidly growing field that has the potential to transform many industries, including healthcare, finance, and transportation. In healthcare, AI is being used to develop personalized treatment plans for patients, while in finance, it is being used to predict stock prices and identify fraud. In transportation, AI is being used to develop self-driving cars that can navigate roads safely and efficiently.

There are several types of AI, including:

1. Rule-based systems: These systems use a set of predefined rules to make decisions.

2. Neural networks: These systems are modeled after the human brain and can learn and make decisions based on patterns in data.

3. Bayesian networks: These systems use probability theory to make decisions based on uncertain data.

4. Reinforcement learning: This type of AI involves learning through trial and error, and is often used in decision</b>