In [2]:
pip install openai langchain langchain-community pinecone-client python-dotenv langchain-pinecone tiktoken sentence_transformers chromadb

Collecting sentence_transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.1/227.1 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting chromadb
  Downloading chromadb-0.5.4-py3-none-any.whl (581 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m581.4/581.4 kB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m
Collecting chroma-hnswlib==0.7.5 (from chromadb)
  Downloading chroma_hnswlib-0.7.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m63.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.111.1-py3-none-any.whl (92 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/92.2 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting uvicorn[standard]>=0.18.3 (from chromadb)
  Downloading uvic

In [5]:
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain_community.chat_models import ChatOpenAI
from langchain_community.llms import OpenAI, HuggingFaceHub
from langchain_community.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_pinecone import PineconeVectorStore

# ------------- Retrieval-Augmented Generation  ------------- #

def get_docs():
    """
    Loads each file into one document, like knowledge base
    :return: docs
    """

    loader = DirectoryLoader("docs", "*.txt", loader_cls=TextLoader)  # Reads custom data from local files

    docs = loader.load()
    return docs

def split_text(docs):
    """
    Get chunks from docs. Our loaded doc may be too long for most models, and even if it fits is can struggle to find relevant context. So we generate chunks
    :param docs: docs to be split
    :return: chunks
    """

    text_splitter = RecursiveCharacterTextSplitter( # recommended splitter for generic text
        chunk_size=2000,
        chunk_overlap=200,
        add_start_index=True
    )
    chunks = text_splitter.split_documents(docs)

    return chunks

def get_data_store(chunks):
    """
    Store chunks into a db. ChromaDB uses vector embeddings as the key, creates a new DB from the documents
    :param docs:
    :param chunks:
    :return: database
    """
    embeddings = HuggingFaceEmbeddings( #  embedding=OpenAIEmbeddings() rate limit
        model_name='sentence-transformers/all-MiniLM-L6-v2',
        model_kwargs={'device': 'cpu'}
    )

    db = Chroma.from_documents(
        documents=chunks,
        embedding=embeddings
    )
    return db

def generate_response(db, prompt):
    """
    Generate a response with a LLM based on previous custom context
    :return: chatbot response
    """

    hf_llm = HuggingFaceHub(
        repo_id="HuggingFaceH4/zephyr-7b-beta",  # Model id
        task="text-generation",                  # Specific task the model is intended to perform
        model_kwargs={
            "max_new_tokens": 512,               # The maximum number of tokens to generate in the response.  Limits the length of the generated text to ensure responses are concise or fit within certain constraints.
            "top_k": 30,                         # Limits the sampling pool to the top k tokens, increasing focus on more likely tokens
            "temperature": 0.3,                  # Controls the randomness of predictions, with lower values making the output more deterministic. : Produces more focused and less random text by making the model more confident in its choices.
            "repetition_penalty": 1.03,          # Penalizes repeated tokens to avoid repetitive output.  Discourages the model from repeating the same token sequences, resulting in more varied and natural text.
        },
    )

    chain = RetrievalQA.from_chain_type( # Generate chat model based on previous llm
        llm=hf_llm,
        chain_type="stuff",
        retriever=db.as_retriever(search_type="similarity", search_kwargs={"k": 1}),
        verbose=False
    )

    response = chain.run(prompt)
    return response


In [6]:
import os, sys, warnings
import openai
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

docs = get_docs()           # Load custom files
chunks = split_text(docs)   # Split into chunks
db = get_data_store(chunks) # Generate vectorstore

#print(f"[LOG] {db.similarity_search(USER_PROMPT)}\n\n")



  warn_deprecated(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [9]:
user_input = "what is ai?"
response = generate_response(db, user_input)
response

"Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nContenidos:\nUnidad 1: Introducción a Transformers\nIntroducción al concepto de Gen AI, LLMs y Transformers. Historia. Arquitectura. Mecanismo de Atención. Embeddings y Positional Encoding. Aplicaciones en la industria.\n\nUnidad 2: Algoritmos de Embedding y Positional Encoding\nAlgoritmos de Embedding y Positional Encoding. Transformer basando en N-grama\n\nUnidad 3: Fine Tuning\nReinforcement Learning. RLHF y sus security issues. Fine tuning. Pipeline productivo. \n\nUnidad 4: Responsible AI\nConsideraciones éticas en AI: biases en training data, fairness, impacto social, detección de contenido generado de forma artificial. Narrow AI vs. AGI. AGI como agente. Foundation models. Emergent capabilities. Security vulnerabilities. Interpretability. Alignment.\n\nUnidad 5: Retrieval Augmented Generation (RAG)\nIntroducción Re

In [10]:
def postprocess_response(response):
    answer_start = response.find("Helpful Answer: ")
    if answer_start != -1:
        answer = response[answer_start + len("Helpful Answer: "):].strip()
    else:
        answer = response.strip()

    return answer

In [14]:
print("Chatbot: Hola! ¿Qué deseas saber sobre 73.64 Temas Avanzandos de Deep Learning?")

user_input = ""
while user_input != "bye":
    user_input = input("You: ")
    response = generate_response(db, user_input) # + "Solo responde en español. únicamente podes hablar en el contexto de la materia llamada Temas Avanzados de Deep Learning") # Get chatbot response to user prompt
    print(f"Chatbot: {postprocess_response(response)}")

Chatbot: Hola! ¿Qué deseas saber sobre 73.64 Temas Avanzandos de Deep Learning?
You: quien es einstein?
Chatbot: En el contexto de la materia llamada Temas Avanzados de Deep Learning, Einstein no tiene relevancia directa. Sin embargo, si se considera la aplicación de Deep Learning en la ciencia, Einstein puede ser un tema de interés en el campo de la física teórica, ya que sus teorías han sido objeto de análisis y predicción utilizando métodos de Deep Learning. En este caso, la respuesta podría ser: "Einstein es un científico famoso por sus contribuciones en el campo de la física teórica, especialmente en la formulación de la teoría de la relatividad. Su trabajo ha sido objeto de análisis y predicción utilizando métodos de Deep Learning en la actualidad."


KeyboardInterrupt: Interrupted by user