<img heigth="8" src="https://i.imgur.com/BcT6v4W.png" alt="llmix">

<h1 align="left">Getting Started</h1>
<h2 align="left"><i>From Inception to Fruition</i></h2>

<p align="left">
  <h3><a href="https://joefaver.dev">Joseph F. Vergel-Becerra</a></h3>
  <br>
  <b>Last updated:</b> <i>03/02/2024</i>
  <br><br>
  <a href="#introduction">Introduction</a> •
  <a href="#references">References</a>
  <br><br>
</p>
<table align="left">
  <td>
      <a href="https://img.shields.io/badge/version-0.1.0-blue.svg?cacheSeconds=2592000">
        <img src="https://img.shields.io/badge/version-0.1.0-blue.svg?cacheSeconds=2592000" alt="Version" height="18">
      </a>
  </td>
  <td>
    <a href="https://colab.research.google.com/github/joefavergel/pbs-enae-python-applications-course/blob/main/2-retail-store-item-detection-preparation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
      </a>
  </td>
  <td>
    <a href="https://github.com/joefavergel/pbs-enae-python-applications-course" target="_parent"><img src="https://img.shields.io/github/forks/joefavergel/pbs-enae-python-beginners-course?style=social" alt="Fork"/>
    </a>
  </td>
</table>
<br>
<br>

---

A compilation of notes.

<a id="introduction"></a>
## Introduction

In [1]:
import os
import os.path
from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)

from pprint import pprint

In [2]:
try:
    import importlib

    __package__ = "llmix"

    found = False
    while not found:
        os.chdir(os.path.join(os.getcwd(), os.pardir))
        found = bool(importlib.util.find_spec(__package__))
        if found:
            print(f"[INFO] WORKDIR: {os.path.join(os.getcwd(), os.pardir)}")
        continue
except ModuleNotFoundError as e:
    print("[ERROR] Verify WORKDIR execution: ", e)

[INFO] WORKDIR: /Users/joefaver/Codes/self/llm-information-retrieval/..


In [6]:
import llmix
from llmix.config import settings
from llmix.logging import get_logger

logger = get_logger(type_='stream', level='DEBUG')

<a id="1-starter-tutorial"></a>
## 1. Starter tutorial

In [7]:
# check if storage already exists
PERSIST_DIR = "./storage"
if not os.path.exists(PERSIST_DIR) or not os.path.exists(os.path.join(PERSIST_DIR, 'docstore.json')):
    # load the documents and create the index
    documents = SimpleDirectoryReader("datalake").load_data()
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=PERSIST_DIR)
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

2024-02-04 11:15:22,472 [36;20m [llama_index.storage.kvstore.simple_kvstore] [DEBUG] [0m Loading llama_index.storage.kvstore.simple_kvstore from ./storage/docstore.json.
2024-02-04 11:15:22,473 [36;20m [fsspec.local] [DEBUG] [0m open file: /Users/joefaver/Codes/self/llm-information-retrieval/storage/docstore.json
2024-02-04 11:15:22,474 [36;20m [llama_index.storage.kvstore.simple_kvstore] [DEBUG] [0m Loading llama_index.storage.kvstore.simple_kvstore from ./storage/index_store.json.
2024-02-04 11:15:22,474 [36;20m [fsspec.local] [DEBUG] [0m open file: /Users/joefaver/Codes/self/llm-information-retrieval/storage/index_store.json
2024-02-04 11:15:22,475 [36;20m [llama_index.graph_stores.simple] [DEBUG] [0m Loading llama_index.graph_stores.simple from ./storage/graph_store.json.
2024-02-04 11:15:22,475 [36;20m [fsspec.local] [DEBUG] [0m open file: /Users/joefaver/Codes/self/llm-information-retrieval/storage/graph_store.json
2024-02-04 11:15:22,476 [36;20m [llama_index.vector_

In [8]:
# either way we can now query the index
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print('\n')
pprint(response)

2024-02-04 11:15:23,244 [36;20m [httpx] [DEBUG] [0m load_ssl_context verify=True cert=None trust_env=True http2=False
2024-02-04 11:15:23,245 [36;20m [httpx] [DEBUG] [0m load_verify_locations cafile='/Users/joefaver/.local/share/virtualenvs/llm-information-retrieval-YlmZYzqu/lib/python3.11/site-packages/certifi/cacert.pem'
2024-02-04 11:15:23,252 [36;20m [openai._base_client] [DEBUG] [0m Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': <function Embeddings.create.<locals>.parser at 0x16aac1da0>, 'json_data': {'input': ['What did the author do growing up?'], 'model': <OpenAIEmbeddingModeModel.TEXT_EMBED_ADA_002: 'text-embedding-ada-002'>, 'encoding_format': 'base64'}}
2024-02-04 11:15:23,253 [36;20m [httpcore.connection] [DEBUG] [0m connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=60.0 socket_options=None
2024-02-04 11:15:23,318 [36;20m [httpcore.connection] [DEBUG] [0m connect_tcp.complete return_value=<httpc

In [9]:
pprint(response.response)

('The author wrote short stories and also worked on programming, specifically '
 'on an IBM 1401 computer in 9th grade. They later transitioned to working on '
 'microcomputers, starting with a kit-built computer and eventually getting a '
 'TRS-80. They wrote simple games, a program to predict rocket heights, and '
 'even a word processor.')


<a id="2-high-level-concepts"></a>
## 2. High-Level Concepts