### Using LlamaIndex basic example
In this example, we will be exploring using LlamaIndex to extract data from a .txt file which contains a paul graham essay

In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from openai import OpenAI
import openai

client = OpenAI(base_url="https://api.together.xyz/v1")

def get_embeddings(texts, model="togethercomputer/m2-bert-80M-32k-retrieval"):
   texts = [text.replace("\n", " ") for text in texts]
   outputs = client.embeddings.create(input = texts, model=model)
   return [outputs.data[i].embedding for i in range(len(texts))]

texts = ["hello", "world"]
get_embeddings(texts)
# openai.__version__ 

[[-0.018421432,
  -0.019084418,
  -0.022481924,
  -0.012206526,
  0.04544988,
  -0.03758842,
  0.01616773,
  -0.040046185,
  -0.012309955,
  -0.10606388,
  0.011725801,
  -0.03656705,
  -0.03325098,
  -0.022664215,
  -0.0150395185,
  -0.020276722,
  -0.027741978,
  0.02952751,
  -0.03955564,
  0.03120004,
  -0.008580741,
  -0.038433682,
  0.056282096,
  -0.07844958,
  -0.0007861646,
  0.04249225,
  0.0035240615,
  0.0064159124,
  0.061222803,
  0.016325343,
  0.03959146,
  0.044059187,
  0.0058826143,
  -0.029554319,
  -0.087901,
  0.03433164,
  0.014571912,
  0.03457016,
  0.04434679,
  0.067285895,
  0.0975733,
  -0.009107015,
  -0.015283383,
  -0.032538287,
  -0.032946747,
  0.09507155,
  -0.043471448,
  -0.030898176,
  0.03682256,
  0.04754958,
  0.013275966,
  0.014277457,
  0.05521932,
  -0.03294326,
  0.039179143,
  0.0068665873,
  0.01700919,
  0.06094223,
  -0.03586325,
  -0.023898413,
  0.018643524,
  0.019730164,
  -0.010233859,
  -0.066563904,
  0.008678666,
  0.066014744,


In [3]:
client.base_url

URL('https://api.together.xyz/v1/')

In [2]:
from llama_index.indices.vector_store import VectorStoreIndex
from llama_index.readers.file.base import SimpleDirectoryReader

In [3]:
documents = SimpleDirectoryReader("data").load_data()

In [4]:
len(documents)

1

In [5]:
documents[0].text[:10]

'\n\nWhat I W'

#### Testing together AI embedding model

In [5]:
from openai import OpenAI

In [7]:
client = OpenAI()

In [8]:
client.base_url

URL('https://api.together.xyz/v1/')

In [10]:
list_of_text = ["hello", "world"]
kwargs = {}

In [11]:
data = client.embeddings.create(input=list_of_text, model="togethercomputer/m2-bert-80M-8k-retrieval", **kwargs).data

In [6]:
index = VectorStoreIndex.from_documents(documents)

In [7]:
index

<llama_index.indices.vector_store.base.VectorStoreIndex at 0x294589590>

In [8]:
query_engine = index.as_query_engine()

In [11]:
response = query_engine.query("What did the author write while growing up?")

In [13]:
from pprint import pprint

In [14]:
pprint(response)

Response(response='The author mentioned working on spam filters and doing some '
                  'painting while growing up. He also mentioned hosting dinner '
                  'parties every Thursday night, which taught him how to cook '
                  'for groups. However, there is no information provided about '
                  'what he wrote specifically while growing up.',
         source_nodes=[NodeWithScore(node=TextNode(id_='a6869362-edd2-426c-af68-5adcd544da4d', embedding=None, metadata={'file_path': 'data/graham.txt', 'file_name': 'graham.txt', 'file_type': 'text/plain', 'file_size': 75041, 'creation_date': '2024-02-08', 'last_modified_date': '2024-02-08', 'last_accessed_date': '2024-02-08'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<Node

In [18]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [16]:
response = query_engine.query("what were the authors hobbies growing up")

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': <function Embeddings.create.<locals>.parser at 0x295dcb880>, 'json_data': {'input': ['what were the authors hobbies growing up'], 'model': 'togethercomputer/m2-bert-80M-32k-retrieval'}}
Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': <function Embeddings.create.<locals>.parser at 0x295dcb880>, 'json_data': {'input': ['what were the authors hobbies growing up'], 'model': 'togethercomputer/m2-bert-80M-32k-retrieval'}}
DEBUG:httpcore.connection:close.started
close.started
DEBUG:httpcore.connection:close.complete
close.complete
DEBUG:httpcore.connection:connect_tcp.started host='api.together.xyz' port=443 local_address=None timeout=60.0 socket_options=None
connect_tcp.started host='api.together.xyz' port=443 local_address=None timeout=60.0 socket_options=None
DEBUG:httpcore.connection:connect_tcp.complete return_value=<httpcore._backends.

In [17]:
pprint(response)

Response(response="The author's hobby growing up was painting. He had a strong "
                  'passion for it and had even given up his software company, '
                  'Viaweb, to pursue it full-time after it was bought by '
                  'Yahoo. However, he found it difficult to gain energy and '
                  'ambition for painting in his new surroundings in '
                  'California. He eventually returned to New York to resume '
                  'his old life, but this time as a rich man. He continued to '
                  'paint and experimented with new techniques. Additionally, '
                  'he was an excellent cook and used to host dinner parties '
                  'every Thursday night.',
         source_nodes=[NodeWithScore(node=TextNode(id_='f60aecf6-79b7-483e-bca8-2521328af169', embedding=None, metadata={'file_path': 'data/graham.txt', 'file_name': 'graham.txt', 'file_type': 'text/plain', 'file_size': 75041, 'creation_date': '2024-02-08', 

In [19]:
index.storage_context.persist()

DEBUG:fsspec.local:open file: /Users/kosisochukwuasuzu/Developer/ai-startups/test-demos/pdfchat/src/llamaindex/gettingstarted/storage/docstore.json
open file: /Users/kosisochukwuasuzu/Developer/ai-startups/test-demos/pdfchat/src/llamaindex/gettingstarted/storage/docstore.json
open file: /Users/kosisochukwuasuzu/Developer/ai-startups/test-demos/pdfchat/src/llamaindex/gettingstarted/storage/docstore.json
DEBUG:fsspec.local:open file: /Users/kosisochukwuasuzu/Developer/ai-startups/test-demos/pdfchat/src/llamaindex/gettingstarted/storage/index_store.json
open file: /Users/kosisochukwuasuzu/Developer/ai-startups/test-demos/pdfchat/src/llamaindex/gettingstarted/storage/index_store.json
open file: /Users/kosisochukwuasuzu/Developer/ai-startups/test-demos/pdfchat/src/llamaindex/gettingstarted/storage/index_store.json
DEBUG:fsspec.local:open file: /Users/kosisochukwuasuzu/Developer/ai-startups/test-demos/pdfchat/src/llamaindex/gettingstarted/storage/graph_store.json
open file: /Users/kosisochuk