### **Introduction**

In [None]:
!pip install gradio --quiet
!pip install tiktoken --quiet
!pip install faiss-cpu --quiet
!pip install unstructured[pdf] --quiet
!pip install --upgrade openai --quiet
!pip install --upgrade langchain --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.1/20.1 MB[0m [31m81.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.2/66.2 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.2/298.2 kB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.7/75.7 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.8/294.8 kB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.7/138.7 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.7/45.7 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.5/59.5 kB[0m [31m7.4 MB/s[0

### **Required Libs**

In [None]:
import os
import openai
import gradio as gr

from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings import OpenAIEmbeddings

from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import TokenTextSplitter

This is the API setup the **embeddings** and **chat** models.

In [None]:
os.environ['OPENAI_API_KEY'] = "29bc1acfcf004d14b50a7d3fb961ee11"
os.environ['OPENAI_API_TYPE'] = "azure"
os.environ['OPENAI_API_VERSION'] = "2023-07-01-preview"
os.environ['OPENAI_API_BASE'] = "https://c-openai-demo.openai.azure.com/"

In [None]:
def create_service_context(
    # Constraint parameters
    max_input_size=4096,        # Context window for the LLM.
    num_outputs=256,            # Number of output tokens for the LLM.
    chunk_overlap_ratio=0.1,    # Chunk overlap as a ratio of chunk size.
    chunk_size_limit=None,      # Maximum chunk size to use.
    chunk_overlap=20,           # Maximum chunk size to use.
    chunk_size=1024,            # Set chunk overlap to use.
):
    # El código para configurar el contexto de servicio se mantiene aquí.

    # The parser that converts documents into nodes.
    node_parser = SimpleNodeParser.from_defaults(
        # The text splitter used to split text into chunks.
        text_splitter=TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    )

    # Allows the user to explicitly set certain constraint parameters.
    prompt_helper = PromptHelper(
        max_input_size,
        num_outputs,
        chunk_overlap_ratio,
        chunk_size_limit=chunk_size_limit)

    # LLMPredictor is a wrapper class around LangChain's LLMChain that allows easy integration into LlamaIndex.
    llm_predictor = LLMPredictor(
        llm=AzureChatOpenAI(
            #temperature=0.5,
            deployment_name="chagpt_model",
            max_tokens=num_outputs))

    # The embedding model used to generate vector representations of text.
    embedding_llm = LangchainEmbedding(
        langchain_embeddings=OpenAIEmbeddings(
            model="text-embedding-ada-002",
            chunk_size=1)
    )

    # Constructs service_context
    service_context = ServiceContext.from_defaults(
        llm_predictor=llm_predictor,
        embed_model=embedding_llm,
        node_parser=node_parser,
        prompt_helper=prompt_helper)

    return service_context

# Función para obtener respuestas del modelo de lenguaje de OpenAI en español
def get_response(input_text):
    # Crear una instancia del modelo de lenguaje de OpenAI en español (o el idioma deseado)
    model = pipeline("text-generation", model="text-embedding-ada-002", device=0, max_length=50)  # Asegúrate de usar el modelo correcto aquí

    # Obtener una respuesta del modelo
    response = model(input_text, num_return_sequences=1)

    # Devolver la respuesta generada por el modelo
    return response[0]['generated_text']
    return service_context

### **Getting Started**

At a high level Walkthrough, there are two components to setting up ChatGPT over your own data:
1. `Ingestion of the data`
2. `Chatbot over the data`

Walking through the steps of each at a high level in the upcoming section

#### **Data Ingestion**
This section dives into more detail on the steps necessary to ingest data.

Next, we can load up a bunch of text files, chunk them up and embed them. LangChain supports a lot of different [document loaders](https://python.langchain.com/docs/modules/data_connection/document_loaders.html), which makes it easy to adapt to other data sources and file formats. You can download the sample data here.



In [None]:
# Initialize gpt-35-turbo and our embedding model
llm = AzureChatOpenAI(
    deployment_name="chagpt_model",
    openai_api_version="2023-03-15-preview")

embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002",
    chunk_size=1)

In [None]:
!mkdir data

In [None]:
loader = DirectoryLoader('data',
                         glob="*.pdf",
                         #loader_cls=TextLoader
                         )

documents = loader.load()
text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


Next, let's ingest documents into [Faiss](https://github.com/facebookresearch/faiss) so we can efficiently query our embeddings:

In [None]:
from langchain.vectorstores import FAISS
db = FAISS.from_documents(documents=docs, embedding=embeddings)

#### **Data Querying**

In [None]:
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate

# Adapt if needed
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""HIBuddy Bot ha sido diseñado específicamente para responder a las preguntas y brindar orientación a los empleados de Hiberus,
tanto en sus primeros días en la empresa como en las dudas que puedan surgir en su rutina diaria.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:""")

qa = ConversationalRetrievalChain.from_llm(llm=llm,
                                           retriever=db.as_retriever(),
                                           condense_question_prompt=CONDENSE_QUESTION_PROMPT,
                                           return_source_documents=True,
                                           verbose=False)

In [None]:
chat_history = []
query = "Quiero cambiar tipo de IRPF, ¿es posible?"
result = qa({"question": query, "chat_history": chat_history})

print("Question:", query)
print("Answer:")
result["answer"]

Question: Quiero cambiar tipo de IRPF, ¿es posible?
Answer:


'Sí, es posible cambiar el tipo de IRPF. Debes enviar una Solicitud de tipo de retención superior, cumplimentada y firmada, a RRHH para solicitar el cambio. Sin embargo, este cambio solo se puede realizar si el tipo solicitado es superior al que resulta del cálculo realizado por nuestro sistema.'

In [None]:
chat_history = []
query = "Dónde puedo ver mi calendario laboral corporativo?"
result = qa({"question": query, "chat_history": chat_history})

print("Question:", query)
print("Answer:")
result["answer"]

Question: Dónde puedo ver mi calendario laboral corporativo?
Answer:


'Puedes ver y descargar tu calendario laboral en Sommos, Área personal → Mi jornada conciliación.'

In [None]:
# Follow-up questions support
chat_history.append((query, result["answer"]))
chat_history=[]
query = "¿Cómo puedo descargar la app Sommos?"
result = qa({"question": query, "chat_history": chat_history})

print("Question:", query)
print("Answer:")

result["answer"]



Question: ¿Cómo puedo descargar la app Sommos?
Answer:


'Puede acceder a la app Sommos desde https://sommos.online/hiberus/ o descargarla en su móvil. Las instrucciones para descargarla se encuentran en el manual adjunto en la convocatoria del Onboarding.'

In [None]:
async def data_querying(input_text, follow_up_questions = True):
  #Reconstruir el storage context
  storage_context = StorageContext.from_defaults(persist_dir="./storage")

  #Carga el índice de almacenamiento
  Index = load_index_from_storage(storage_context, service_context=create_service_context())

  #Cromprueba si es un chat de seguimiento o no
  #A continuación, consulta el índice con el texto de entrada
  if follow_up_questions:
    response = index.as_chat_engine().chat(input_text)
  else:
    response = index.as_query_engine().query(input_text)
  return response.response

In [None]:
# Inferfaz gradio
iface = gr.ChatInterface(
    data_querying,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="¿Qué quieres saber acerca de Hiberus?", container=False, scale=7),
    title="HIBuddy Bot",
    description="Soy el Buddy de Hiberus, y estoy encantado de poder ayudarte en tus primeros días en la empresa",
    theme="soft",
    examples=["¿Cómo puedo descargar la app Sommos?", "Quiero cambiar tipo de IRPF, ¿es posible?", "¿Cuando debo solicitar mis vacaciones?"],
    cache_examples=False,
    retry_btn="Repetir",
    undo_btn="Deshacer",
    clear_btn="Borrar",
    submit_btn="Enviar"
)

iface.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://1b85de3462dabed7bc.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/gradio/routes.py", line 507, in predict
    output = await route_utils.call_process_api(
  File "/usr/local/lib/python3.10/dist-packages/gradio/route_utils.py", line 219, in call_process_api
    output = await app.get_blocks().process_api(
  File "/usr/local/lib/python3.10/dist-packages/gradio/blocks.py", line 1437, in process_api
    result = await self.call_function(
  File "/usr/local/lib/python3.10/dist-packages/gradio/blocks.py", line 1107, in call_function
    prediction = await fn(*processed_input)
  File "/usr/local/lib/python3.10/dist-packages/gradio/utils.py", line 616, in async_wrapper
    response = await f(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/gradio/chat_interface.py", line 415, in _submit_fn
    response = await self.fn(message, history, *args)
  File "<ipython-input-27-f408e7b3507b>", line 3, in data_querying
    storage_context = StorageContext.from_defaults(pe