In [None]:
import os
import dotenv
import logging
import argparse

from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def load_environment(env_file = ".env"):

    try:
        env_file = os.path.abspath(env_file)
        if not os.path.exists(env_file):
            logger.error(f"Error: {env_file} does not exist")
            return False
        
        required_env_vars = ["OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENAI_MODEL"]
        dotenv.load_dotenv(dotenv_path=env_file)
        
        for var in required_env_vars:
            value = os.getenv(var)
            logger.info(f"{var}: {'is set' if value else 'is not set'}")
            if not value:
                logger.error(f"Error: {var} is not set")
                return False
        
        return True

    except Exception as e:
        logger.error(f"Error loading environment: {e}")
        return False

def configure_models():
    api_key = os.getenv("OPENAI_API_KEY")
    api_base = os.getenv("OPENAI_BASE_URL")
    llm = OpenAI(
        model = os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
        api_key = api_key,
        api_base = api_base
    )
    embed_model = OpenAIEmbedding(
        model = "text-embedding-3-small",
        api_key = api_key,
        api_base = api_base
    )
    return llm, embed_model

def create_index(embed_model, documents_dir = "documents", persist_dir = "local_vector_store", batch_size = 128):
    logger.info(f"Setting global embed model...")
    Settings.embed_model = openai_embed_model
    if os.path.exists(persist_dir):
        logger.info(f"Loading existing index from: {persist_dir}")
        storage_context = StorageContext.from_persist_dir(persist_dir)
        return load_index_from_storage(storage_context)
    
    if not os.path.exists(documents_dir):
        logger.error(f"Error: {documents_dir} does not exist")
        return None
    
    logger.info(f"Creating index from documents in: {documents_dir}")
    documents = SimpleDirectoryReader(documents_dir).load_data()
    index = VectorStoreIndex.from_documents(documents, index_batch_size=batch_size)
    index.storage_context.persist(persist_dir=persist_dir)
    return index

def query_index(index, llm, query):
    if not query.isinstance(str):
        logger.error(f"Error: query must be a string, got {type(query)}")
        return None
    
    retriever = index.as_retriever()
    retrieved_docs = retriever.retrieve(query)
    logger.info(f"Retrieved documents: {retrieved_docs}")
    logger.info(f"Retreived document length: {len(retrieved_docs)}")
    query_engine = index.as_query_engine(llm=llm)
    response = query_engine.query(query)
    return response

def main(query = "What is the capital of Italy?"):
    if not load_environment():
        logger.error(f"Environment variables not loaded. Exiting.")
        return
    
    try:
        logger.info("Configuring LLM and Embedding Model...")
        llm, openai_embed_model = configure_models()
        logger.info("LLM and Embedding Model initialized successfully")
        logger.info(f"Making API request with query: {query}")
        response = llm.complete(query)
        logger.info(f"API response: {response}")
        
        index = create_index(openai_embed_model, documents_dir="documents", persist_dir="local_vector_store", batch_size=128)
        if index is None:
            logger.error(f"Error creating index. Exiting.")
            return
        logger.info(f"Index created successfully")
        response = query_index(index, llm, query)
        if response:
            logger.info(f"Query engine response: {response}")
        
    except Exception as e:
        logger.error(f"Error in main: {e}, type: {type(e)}")
    
def is_notebook():
    try:
        shell = get_ipython().__class__.__name__
        return shell == 'ZMQInteractiveShell'
    except NameError:
        return False

if __name__ == "__main__":
    default_query = "What is so cool about capital of Italy?"
    if is_notebook():
        logging.info(f"Running in notebook mode with default query: {default_query}")
        main(query = default_query)