## CSV Vector DB

In [23]:
import os
import shutil
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from dotenv import load_dotenv
import openai
from llama_index.embeddings.openai import (
    OpenAIEmbedding,
    OpenAIEmbeddingModelType
)
# Load environment variables
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
# Read documents from the organized CSV folder
reader = SimpleDirectoryReader(input_dir='data/preprocessed-1k-samples/CSV')
docs = reader.load_data()

# Load VectorStoreIndex directly from documents
embed_model=OpenAIEmbedding(model=OpenAIEmbeddingModelType.TEXT_EMBED_3_LARGE,)
index = VectorStoreIndex.from_documents(docs,embed_model=embed_model, show_progress=True)
index.set_index_id('health')

# Create the storage path if it doesn't exist
os.makedirs('Health-RAG/RAG/VDB-1k-samples/CSV', exist_ok=True)

# Persist the vector store
index.storage_context.persist('Health-RAG/RAG/VDB-1k-samples/CSV')

Parsing nodes: 100%|██████████| 143/143 [00:00<00:00, 1518.40it/s]
Generating embeddings: 100%|██████████| 143/143 [00:07<00:00, 18.19it/s]


## PDF Vector DB


In [17]:
import os
import shutil
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from dotenv import load_dotenv
import openai
from llama_index.embeddings.openai import (
    OpenAIEmbedding,
    OpenAIEmbeddingModelType
)
# Load environment variables
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
# Read documents from the organized PDF folder
reader = SimpleDirectoryReader(input_dir='data/preprocessed-1k-samples/PDF')
docs = reader.load_data()

# Load VectorStoreIndex directly from documents
embed_model=OpenAIEmbedding(model=OpenAIEmbeddingModelType.TEXT_EMBED_3_LARGE,)
index = VectorStoreIndex.from_documents(docs,embed_model=embed_model, show_progress=True)
index.set_index_id('health')

# Create the storage path if it doesn't exist
os.makedirs('Health-RAG/RAG/VDB-1k-samples/PDF', exist_ok=True)

# Persist the vector store
index.storage_context.persist('Health-RAG/RAG/VDB-1k-samples/PDF')

Parsing nodes: 100%|██████████| 15/15 [00:00<00:00, 266.01it/s]
Generating embeddings: 100%|██████████| 15/15 [00:02<00:00,  5.33it/s]
