# LlamaIndex Introduction

In [21]:
import os
from llamaindex_custom_utils.helper import get_openai_api_key, get_activeloop_api_key , print_response
OPENAI_API_KEY = get_openai_api_key()
ACTIVELOOP_TOKEN = get_activeloop_api_key()

In [2]:
import logging
import sys

#You can set the logging level to DEBUG for more verbose output,
# or use level=logging.INFO for less detailed information.
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

## LlamaHub Wikipedia Integration

In [None]:
from llama_index import download_loader

WikipediaReader = download_loader("WikipediaReader")

loader = WikipediaReader()

In [None]:
documents = loader.load_data(pages=['Natural Language Processing', 'Artificial Intelligence'])

In [None]:
len( documents )

## Create index from Documents

In [None]:
from llama_index import GPTVectorStoreIndex

index = GPTVectorStoreIndex.from_documents(documents)
query_engine = index.as_query_engine()
response = query_engine.query("What does NLP stands for?")
print_response(response.response)

## Create Nodes

In [None]:
from llama_index.node_parser import SimpleNodeParser

# Assuming documents have already been loaded

# Initialize the parser
parser = SimpleNodeParser.from_defaults(chunk_size=512, chunk_overlap=20)

# Parse documents into nodes
nodes = parser.get_nodes_from_documents(documents)
print("Number of chunks = " + str(len( nodes )) )

## Save on DeepLake

In [None]:
from llama_index.vector_stores import DeepLakeVectorStore

my_activeloop_org_id = "jdhernandez"
my_activeloop_dataset_name = "llamaindex-intro"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"

# Create an index over the documnts
vector_store = DeepLakeVectorStore(dataset_path=dataset_path, overwrite=True)

In [None]:
from llama_index.storage.storage_context import StorageContext

storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [None]:
from llama_index import VectorStoreIndex

index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)