# Load libraries

In [None]:
# Use this initial code to work in the notebook as if it were a module, that 
# is, to be able to export classes and functions from other subpackages.

import os
import sys

package_path = os.path.abspath('.').split(os.sep + 'notebooks')[0]
if package_path not in sys.path:
    sys.path.append(package_path)

%load_ext autoreload
%autoreload 2

In [None]:
import os
import torch
from dotenv import load_dotenv
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from tuyabot_llm import AbsolutePaths

In [None]:
# Load environment variables from .env
load_dotenv()

# Get information from web pages

In [None]:


# Step 1: Scrape the content from apple.com using WebBaseLoader
urls = [
        "https://www.tuya.com.co/como-pago-mi-tarjeta-o-credicompras", 
        "https://www.tuya.com.co/tarjetas-de-credito", 
        "https://www.tuya.com.co/credicompras", 
        "https://www.tuya.com.co/otras-soluciones-financieras", 
        "https://www.tuya.com.co/nuestra-compania", 
        "https://www.tuya.com.co/activacion-tarjeta"
]

# Create a loader for web content
loader = WebBaseLoader(urls)
documents = loader.load()

In [None]:
documents[0].page_content

In [None]:
print(documents[1].page_content)

In [None]:
# import nltk
# from nltk.corpus import stopwords
# from nltk.tokenize import word_tokenize

# nltk.download('punkt')
# nltk.download('stopwords')

# for document in documents:
#     tokens = word_tokenize(document.page_content)
#     stop_words = set(stopwords.words('spanish'))
#     tokens = [token for token in tokens if token.lower() not in stop_words]
#     document.page_content = ' '.join(tokens)

In [None]:
documents[0].page_content

In [None]:
for document in documents:
    document.page_content = document.page_content.lower()

In [None]:
print(documents[1])

In [None]:
for document in documents:
    document.page_content = document.page_content.strip()
    document.page_content = document.page_content.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')
    document.page_content = ' '.join(document.page_content.split())

In [None]:
documents[1].page_content

In [None]:
print(documents[1])

# Chunk files and create embeddings

## Chunk files

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter


text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=700,
    chunk_overlap=5,
    length_function=len
)
docs = text_splitter.split_documents(documents)

In [None]:
docs

In [None]:
len(docs)

## Embeddings

In [None]:
from langchain.embeddings import SentenceTransformerEmbeddings

embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Create vector database

In [None]:
# Define the persistent directory
current_dir = AbsolutePaths().get_abs_path_folder('raw')
db_dir = os.path.join(current_dir, "tuya_collection")
persistent_directory = os.path.join(db_dir, "chroma_tuya_collection")

# Step 4: Create and persist the vector store with the embeddings
if not os.path.exists(persistent_directory):
    print(f"\n--- Creating vector store in {persistent_directory} ---")
    db = Chroma.from_documents(docs, embeddings, persist_directory=persistent_directory)
    print(f"--- Finished creating vector store in {persistent_directory} ---")
else:
    # print(f"Vector store {persistent_directory} already exists. No need to initialize.")
    # db = Chroma(persist_directory=persistent_directory, embedding_function=embeddings)

    print(f"\n--- Creating vector store in {persistent_directory} ---")
    db = Chroma.from_documents(docs, embeddings, persist_directory=persistent_directory)
    print(f"--- Finished creating vector store in {persistent_directory} ---")

# Retrive documents

In [None]:
# Step 5: Query the vector store
retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 10},
)

# Define the user's question
query = "¿Cuáles son los valores la tasa de interés y póliza del producto credicompras?"

# Retrieve relevant documents based on the query
relevant_docs = retriever.invoke(query)

# Display the relevant results with metadata
print("\n--- Relevant Documents ---")
for i, doc in enumerate(relevant_docs, 1):
    print(f"Document {i}:\n{doc.page_content}\n")
    if doc.metadata:
        print(f"Source: {doc.metadata.get('source', 'Unknown')}\n")

In [None]:
relevant_docs

In [None]:
def format_docs(docs):
    formatted_docs = "\n\n".join(doc.page_content for doc in docs)
    # print(f"**Formatted Docs**: {formatted_docs}\n*******************************")  # Inspeccionar la salida de format_docs
    return formatted_docs

context = format_docs(relevant_docs)  # Aplica el formateo a los documentos recuperados
print("Context for LLM:", context)  # Ver el contexto final que se pasará al modelo

# Load LLM

In [None]:
import torch

if torch.cuda.is_available():
    print("CUDA está disponible. El modelo puede ejecutarse en GPU.")
else:
    print("CUDA no está disponible. El modelo se ejecutará en CPU.")

In [None]:
torch.set_default_tensor_type("torch.cuda.FloatTensor")

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline 
from langchain_huggingface import HuggingFacePipeline

model_id = 'unsloth/Llama-3.2-1B-Instruct' 
tokenizer = AutoTokenizer.from_pretrained(model_id, device="cuda:0", truncation=True)
model = AutoModelForCausalLM.from_pretrained(model_id) 


pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=2000,
    # truncate=True,
    temperature=0.1,
    top_k=10,
    # repetition_penalty=1.5,
    # no_repeat_ngram_size=4,  # Ajusta el tamaño de los n-gramas que no se pueden repetir
)

#conversión a uso api tipo langchain 
local_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
# Verifica la salida generada
# prompt = "Dime un chiste"
# response = local_llm(prompt)
# print(response)

# Use RAG architecture

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

template = """
Eres un agente de servicio al cliente que trabaja para TUYA SA, una empresa que se
dedica a ser la solución financiera del retail y que busca apoyar a los sectores
vulnerables de la sociedad. 

Como agente de servicio al cliente, debes suministrar respuestas amigables y
claras a los clientes.

Emplea el contexto que te ofrece la empresa TUYA delimitado por triple comillas invertidas, para responder
la pregunta que se encuentra al final delimitada por comillas simples.

Siempre que puedas responder con una serie de items hazlo, tu respuesta es máximo de 15 palabras.

Contexto: ```{context}```

Pregunta: '{question}'
"""

prompt = PromptTemplate.from_template(template)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | local_llm
    | StrOutputParser()
)

In [None]:
question = "Cuáles son los productos financieros de Tuya?"
rag_chain.invoke(question)

In [None]:
q = """
¿Cuáles son los valores de la tasa de interés y póliza del producto credicompras?
"""
for chunk in rag_chain.stream(q):
    print(chunk, end="", flush=True)

In [None]:
q = """
¿Cuáles son los nombres de las tarjetas de crédito que tiene disponible Tuya?
"""
for chunk in rag_chain.stream(q):
    print(chunk, end="", flush=True)

# Demo to deploy sing gradio

In [None]:
# Definir la función para generar la respuesta
def generate_response(question):

    template = """
        Eres un agente de servicio al cliente que trabaja para TUYA SA, una empresa que se
        dedica a ser la solución financiera del retail y que busca apoyar a los sectores
        vulnerables de la sociedad. 

        Como agente de servicio al cliente, debes suministrar respuestas amigables y
        claras a los clientes.

        Emplea el contexto que te ofrece la empresa TUYA delimitado por triple comillas invertidas, para responder
        la pregunta que se encuentra al final delimitada por comillas simples.

        Siempre que puedas responder con una serie de items hazlo, tu respuesta es máximo de 15 palabras.

        El formato de la respuesta corresponde a:

        TuyaBot: Tu respuesta va aquí

        Contexto: ```{context}```

        Pregunta: '{question}'
        """
    
    prompt = PromptTemplate.from_template(template)

    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | local_llm
        | StrOutputParser()
    )

    output = rag_chain.invoke(question)

    
    return  output



In [None]:
question = "Cuáles son los productos financieros de Tuya?"
generate_response(question=question)

In [None]:
import gradio as gr

# Crear la interfaz de Gradio
demo = gr.Interface(
    fn=generate_response,
    inputs=[
        gr.Textbox(label="question"),
    ],
    outputs=gr.Textbox(label="answer"),
    title="Agente de Servicio al Cliente TUYABOT",
    description="Pregúntame algo sobre TUYA SA",
)

# Desplegar la interfaz
demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://9c6aa976169f91efcf.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [41]:
demo.close()

Closing server running on port: 7860
