In [52]:
import logging
import sys
from dotenv import load_dotenv
import os
from pinecone import Pinecone, ServerlessSpec
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
)
from llama_index.vector_stores.pinecone import PineconeVectorStore

# Load environment variables from .env file
load_dotenv()

pinecone_api_key = os.environ.get('PINECONE_API_KEY')

if pinecone_api_key is not None:
    print("PINECONE_API: found")
else:
    print("PINECONE_API environment variable is not set.")

pc = Pinecone(api_key=pinecone_api_key)

# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))


def check_index_exists(index_name):
    try:
        # Attempt to fetch index metadata
        index_info = pc.Index(index_name)
        print(f"Index '{index_name}' already exists.")
        logging.info(pc.Index('gael').describe_index_stats())
        return True
    except Exception as e:
        print(f"Index '{index_name}' does not exist.")
        return False

if not check_index_exists("gael"):
    
    # Create the index in pinecone
    logging.info("Creating index gael")
    
    pc.create_index(
    name="gael",
    dimension=1536, # Replace with your model dimensions
    metric="euclidean", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-west-2"
    ) 
    )
    
    documents = SimpleDirectoryReader("../data/test").load_data()

    # construct vector store and customize storage context
    storage_context = StorageContext.from_defaults(
        vector_store=PineconeVectorStore(pc.Index("gael"))
    )

    index = VectorStoreIndex.from_documents(
        documents, storage_context=storage_context
    )

PINECONE_API: found
Index 'gael' already exists.
INFO:root:{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 11}},
 'total_vector_count': 11}
{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 11}},
 'total_vector_count': 11}
{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 11}},
 'total_vector_count': 11}
{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 11}},
 'total_vector_count': 11}
{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 11}},
 'total_vector_count': 11}
{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 11}},
 'total_vector_count': 11}
{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 11}},
 'total_vector_count': 11}
{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 11}},
 'total_vector_count': 11}
{'dimension': 1536,
 'index_f

In [53]:
query_engine = index.as_query_engine()
response = query_engine.query("What is attention and why do I need it?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://