<a href="https://colab.research.google.com/github/asanchezmh/iathon_vf/blob/main/VF_iathon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
### **Installations**

!mkdir data

# from google.colab import drive
# drive.mount('/content/drive')

!pip install pypdf --quiet
!pip install gradio --quiet
!pip install langchain --quiet
!pip install llama_index --quiet
!pip install googletrans --quiet
!pip install transformers --quiet
!pip install docx2txt --quiet

### **Imports**

import os
import sys
import gradio as gr
from transformers import pipeline

from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import AzureChatOpenAI

from llama_index import (
    SimpleDirectoryReader,
    LLMPredictor,
    PromptHelper,
    StorageContext,
    ServiceContext,
    GPTVectorStoreIndex,
    LangchainEmbedding,
    load_index_from_storage,
    set_global_service_context)

from llama_index.node_parser import SimpleNodeParser
from llama_index.text_splitter import TokenTextSplitter
from llama_index.response.notebook_utils import display_response

### **Azure OpenAI**

os.environ['OPENAI_API_KEY'] = "29bc1acfcf004d14b50a7d3fb961ee11"
os.environ['OPENAI_API_TYPE'] = "azure"
os.environ['OPENAI_API_VERSION'] = "2023-07-01-preview"
os.environ['OPENAI_API_BASE'] = "https://c-openai-demo.openai.azure.com/"

# **Service Context**

def create_service_context(
    # Constraint parameters
    max_input_size=4096,        # Context window for the LLM.
    num_outputs=256,            # Number of output tokens for the LLM.
    chunk_overlap_ratio=0.1,    # Chunk overlap as a ratio of chunk size.
    chunk_size_limit=None,      # Maximum chunk size to use.
    chunk_overlap=20,           # Maximum chunk size to use.
    chunk_size=1024,            # Set chunk overlap to use.
):
    # The code to configure the service context is moved to a separate function.

    def _configure_service_context():
        node_parser = SimpleNodeParser.from_defaults(
            text_splitter=TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
        )
        prompt_helper = PromptHelper(
            max_input_size,
            num_outputs,
            chunk_overlap_ratio,
            chunk_size_limit=chunk_size_limit)
        llm_predictor = LLMPredictor(
            llm=AzureChatOpenAI(
                #temperature=0.5,
                deployment_name="chagpt_model",
                max_tokens=num_outputs))
        embedding_llm = LangchainEmbedding(
            langchain_embeddings=OpenAIEmbeddings(
                model="text-embedding-ada-002",
                chunk_size=1)
        )
        service_context = ServiceContext.from_defaults(
            llm_predictor=llm_predictor,
            embed_model=embedding_llm,
            node_parser=node_parser,
            prompt_helper=prompt_helper)
        return service_context

    return _configure_service_context()

# Función para obtener respuestas del modelo de lenguaje de OpenAI en español
def get_response(input_text):
    # Crear una instancia del modelo de lenguaje de OpenAI en español (o el idioma deseado)
    model = pipeline("text-generation", model="text-embedding-ada-002", device=0, max_length=50)  # Asegúrate de usar el modelo correcto aquí

    # Obtener una respuesta del modelo
    response = model(input_text, num_return_sequences=1)

    # Devolver la respuesta generada por el modelo
    return response[0]


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m276.0/276.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.2/20.2 MB[0m [31m36.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.2/66.2 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.2/298.2 kB[0m [31m25.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.7/75.7 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.8/294.8 kB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.7/138.7 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.7/45.7 kB[0m [31m4.6 MB/s[

In [6]:
service_context = create_service_context()
set_global_service_context(service_context)

In [8]:
def data_ingestion_indexing(data):
  #Carga los datos de la ruta de directorio especificada
  documents = SimpleDirectoryReader ('data').load_data()

  #Al crear el indice por primera vez
  index = GPTVectorStoreIndex.from_documents(
      documents, service_context=create_service_context()
  )

  #Manten el índice en el disco, carpeta "storage" por defecto
  index.storage_context.persist()

  return index

In [9]:
index = data_ingestion_indexing ("data")

In [10]:
async def data_querying(input_text, follow_up_questions = True):
  #Reconstruir el storage context
  storage_context = StorageContext.from_defaults(persist_dir="./storage")

  #Carga el índice de almacenamiento
  Index = load_index_from_storage(storage_context, service_context=create_service_context())

  #Cromprueba si es un chat de seguimiento o no
  #A continuación, consulta el índice con el texto de entrada
  if follow_up_questions:
    response = index.as_chat_engine().chat(input_text)
  else:
    response = index.as_query_engine().query(input_text)
  return response.response

In [11]:
# Función para mostrar el historial de respuestas
def mostrar_historial():
    historial_texto = "\n".join(historial_respuestas)
    return historial_texto

# Define la URL de la imagen del logo
logo_url = "https://www.hiberus.com/sites/default/files/2022-02/Logo_Hiberus_Azul_0.png"

iface = gr.ChatInterface(
    data_querying,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="¿Qué quieres saber acerca de Hiberus?", container=False, scale=7),
    title="HIBuddy Bot",
    description="Soy el Buddy de Hiberus, y estoy encantado de poder ayudarte en tus primeros días en la empresa",
    theme="soft",
    examples=["¿Cómo puedo descargar la app Sommos?", "Quiero cambiar tipo de IRPF, ¿es posible?", "¿Cuando debo solicitar mis vacaciones?"],
    cache_examples=True,
    retry_btn="Repetir",
    undo_btn="Deshacer",
    clear_btn="Borrar",
    submit_btn="Enviar"
)

iface.launch(share=True, debug=False)

Caching examples at: '/content/gradio_cached_examples/17'
Caching example 1/3




Caching example 2/3




Caching example 3/3




Caching complete

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://21302de2e1d4174f57.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


