In [1]:
import os
from llama_index import ServiceContext, LLMPredictor, OpenAIEmbedding, PromptHelper
from llama_index.llms import OpenAI
from llama_index.text_splitter import TokenTextSplitter
from llama_index.node_parser import SimpleNodeParser
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from llama_index import set_global_service_context

In [2]:
openai_api_key = os.environ["OPENAI_API_KEY"]

In [3]:
from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader("./ML-DS-TEXTBOOKS").load_data()

In [6]:
import tiktoken

In [4]:
from llama_index.node_parser import TokenTextSplitter

In [8]:
text_splitter = TokenTextSplitter(
  separator=" ",
  chunk_size=1024,
  chunk_overlap=20,
  backup_separators=["\n"],
  tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode

)

nodes = text_splitter.get_nodes_from_documents(documents)


In [23]:
len(nodes)

510

In [9]:
import together

together.api_key = os.environ["TOGETHER_API_KEY"]

In [10]:
import logging
from typing import Any, Dict, List, Mapping, Optional

from pydantic import Extra, Field, root_validator

from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.llms.base import LLM
from langchain.llms.utils import enforce_stop_tokens
from langchain.utils import get_from_dict_or_env

In [11]:
class TogetherLLM(LLM):
    """Together large language models."""

    model: str = "togethercomputer/llama-2-70b-chat"
    """model endpoint to use"""

    together_api_key: str = os.environ["TOGETHER_API_KEY"]
    """Together API Key"""

    temperature: float = 0.7
    """what sampling temperature to use. """

    max_tokens: int = 512
    """The maximum number of tokens to generate in the completion. """

    class Config:
        extra = Extra.forbid

    #root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
        """Validate that the API key is set. """
        api_key = get_from_dict_or_env(
            values, "together_api_key", "TOGETHER_API_KEY"
        )
        values["together_api_key"] = api_key
        return values

    #property
    def _llm_type(self) -> str:
        """Rteurn type of LLM. """
        return "together"

    def _call(
        self,
        prompt: str,
        **kwargs: Any,
    ) -> str:
        """Call to Together endpoint. """
        together.api_key = self.together_api_key
        output =  together.Complete.create(prompt,
                                           model=self.model,
                                           max_tokens=self.max_tokens,
                                           temperature=self.temperature,
                                           )
        text = output['output']['choices'][0]['text']
        return text

In [12]:
test_llms = TogetherLLM(
    model = "togethercomputer/llama-2-70b-chat",
    temperature = 0.1,
    max_tokens=512
)

In [21]:
from llama_index.embeddings import TogetherEmbedding

In [22]:
embed_model = TogetherEmbedding(
    model_name="togethercomputer/m2-bert-80M-8k-retrieval", api_key=together.api_key
)

In [36]:
from InstructorEmbedding import INSTRUCTOR
from langchain_community.embeddings import HuggingFaceInstructEmbeddings

  from tqdm.autonotebook import trange


In [37]:
instructor_embedding = HuggingFaceInstructEmbeddings(model_name='hkunlp/instructor-xl',
                                                     model_kwargs={"device": "cpu"})

load INSTRUCTOR_Transformer
max_seq_length  512


In [27]:
llm = test_llms

embed_model = "local"

prompt_helper = PromptHelper(

  context_window=4096, 

  num_output=256, 

  chunk_overlap_ratio=0.1, 

  chunk_size_limit=None

)

In [29]:
service_context = ServiceContext.from_defaults(

  llm=llm,

  embed_model=embed_model,

  node_parser=text_splitter,

  prompt_helper=prompt_helper

)

config.json: 100%|██████████| 684/684 [00:00<00:00, 34.7kB/s]
model.safetensors: 100%|██████████| 133M/133M [00:37<00:00, 3.55MB/s] 
tokenizer_config.json: 100%|██████████| 366/366 [00:00<00:00, 85.5kB/s]
vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 543kB/s]
tokenizer.json: 100%|██████████| 711k/711k [00:00<00:00, 1.08MB/s]
special_tokens_map.json: 100%|██████████| 125/125 [00:00<00:00, 31.2kB/s]


In [13]:
import chromadb
from llama_index.vector_stores import ChromaVectorStore
from llama_index.storage.storage_context import StorageContext

In [14]:
db = chromadb.PersistentClient(path="./chroma_db")

In [15]:
chroma_collection = db.get_or_create_collection("forML")

In [16]:
# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [17]:
index = VectorStoreIndex.from_vector_store(
    vector_store, storage_context=storage_context
)

In [32]:
query_engine = index.as_query_engine()
response = query_engine.query("What are decision trees?")
print(response)

Decision Trees are versatile Machine Learning algorithms that can perform both classification and regression tasks, and even multioutput tasks. They are very powerful algorithms, capable of fitting complex datasets. They are simple to understand and interpret, easy to use, versatile, and powerful. However, they do have a few limitations. First, they are sensitive to training set rotation. Second, they are very sensitive to small variations in the training data.


In [30]:
# create your index
index = VectorStoreIndex.from_documents(
    documents, 
    service_context = service_context
    #storage_context=storage_context
)

In [None]:
from llama_index.ingestion import IngestionPipeline
from llama_index.node_parser import TokenTextSplitter

pipeline = IngestionPipeline(transformations=[TokenTextSplitter(), ...])

nodes = pipeline.run(documents=documents)