In [3]:
import os
import textwrap
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
from deeplake.core.vectorstore import DeepLakeVectorStore


# .env file should contain the following:
## OPENAI_API_KEY=your_openai_api_key
## DATA_DIR=your_data_dir
## VECTORSTORE_PATH=your_vectorstore_path, can be any path where vectors will be stored

from dotenv import load_dotenv
load_dotenv('../.env') # replace with your .env file

True

### Load documents
`DATA_FOLDER_PATH` is the path where I have all my files. You can point this to a folder with your own documents.

In [4]:
documents = SimpleDirectoryReader(os.getenv('DATA_FOLDER_PATH')).load_data()

### Creating Vector Store
We will use [deeplake](https://github.com/activeloopai/deeplake?utm_source=deeplakeweb&utm_medium=web&utm_campaign=navbar&utm_id=deeplake) from Active Loop to create our vector store. This `DeepLakeVectorStore` is then converted into llama_index support context `StorageContext`. Then we will use llama_index's `VectorStoreIndex` to read documents and store them into our Vector Store through the Storage Context.

In [11]:
from llama_index.core import StorageContext

dataset_path = os.getenv('VECTORSTORE_PATH') # this is where the vectors will be stored

if not os.path.exists(dataset_path):
    os.makedirs(dataset_path)

# Create an index over the documents
vector_store = DeepLakeVectorStore(path=dataset_path, overwrite=True)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)



### Query Time
Now that our documents are index and store in DeepLake's Vector Store, we can run queries on it.

In [14]:
query_engine = index.as_query_engine()
response = query_engine.query(
    "Who is Awais?",
)
print(textwrap.fill(str(response), 100))

Muhammad Awais Kaleem is Awais.
