In [2]:
### OBEN GROUP - PROYECTO DE INTELIGENCIA ARTIFICIAL ###
#
# Archivo base para webapp prototipo
# Chatbot con documentos
#
# Desarrollado por:
# Carlos Gorricho
# cel: +57 314 771 0660
# email: carlosgorricho@hobengroup.co

In [12]:
### IMPORTAR DEPENDENCIAS ###
import streamlit as st
from PyPDF2 import PdfReader
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.openai import OpenAIEmbeddings
from langchain_community.llms import OpenAI
from langchain_community.chat_models import ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain import hub
from langchain.schema.runnable import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.prompts import PromptTemplate
from typing import List
from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
# from langchain_pinecone import PineconeVectorStore
from pydantic import BaseModel, Field
import dotenv
import os
import warnings
warnings.filterwarnings('ignore') 

In [4]:
# carga variables de entorno de .env
dotenv.load_dotenv()

True

In [39]:
pdf_docs = ['./manuales/LineOperationManual_P_1932SHB_en-GB_v1_2022-01-24.pdf']

In [43]:
text = ""
for pdf in pdf_docs:
    pdf_reader = PdfReader(pdf)
    for page in pdf_reader.pages:
        text += page.extract_text()

In [100]:
len(text)

374997

In [101]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2048, 
    chunk_overlap=256)
chunks = text_splitter.create_documents([text])

In [104]:
len(chunks)

210

In [105]:
len(chunks[0].page_content)

2032

In [106]:
for chunk in chunks:
    print(len(chunk.page_content))

2032
1987
1967
1923
1895
2015
1988
1920
1973
2017
1971
1942
2044
1937
1966
1890
2022
1980
1981
2028
2023
2001
2003
1971
2034
2008
1991
2004
2030
2013
2030
2026
2034
2046
2032
2037
2026
1981
2019
2018
2041
2041
2025
2026
1977
2008
1983
2009
1995
1987
1990
2014
2009
2046
2041
2003
1994
2029
2010
2012
2036
1999
2027
2042
1979
1988
2004
2012
1982
1987
2044
2010
2000
2042
2039
2043
2016
1982
2000
2042
2042
2020
2037
1984
2009
2012
2010
2047
2007
1990
2009
2018
2029
2025
1969
1973
2018
1999
2046
2047
2028
1997
2013
2017
2030
2010
2011
2034
2009
1983
2029
2024
2016
2046
2004
2038
2006
1995
2006
2015
2015
2007
2029
2026
2038
2035
1985
1969
1986
1989
2044
1995
2015
2011
2034
1955
2020
2041
2012
2040
2004
2045
1996
2046
1998
2044
2009
2036
2036
1980
2024
2042
2005
1994
1992
2014
2004
2020
2022
2031
2019
2013
2047
2023
2017
2038
2039
1987
2044
2046
1999
2047
1984
1994
1992
2001
2003
2014
2035
1976
1988
2042
1968
2037
2013
1990
2029
2022
2008
1980
2040
1989
2031
2005
2020
2017
2016
1972
1994
2011


In [60]:
Chroma.from_documents(documents=chunks,
                          embedding=OpenAIEmbeddings(model='text-embedding-3-large'), 
                          persist_directory="./chroma_db_debug",
                          )

<langchain_community.vectorstores.chroma.Chroma at 0x180b65a8050>

In [61]:
# This is the first API key input; no need to repeat it in the main function.

def get_conversational_chain(retriever):
    template = """Use the following pieces of context to answer the question at the end. 
        Always answer the question in the language in which it is asked.
        If you don't know the answer, just say that you don't know, don't try to make up an answer. 
        Give your responses primarily in numbered lists. 
    
    {context}

    Question: {question}

    Helpful Answer:
    """
    rag_prompt_custom = PromptTemplate.from_template(template)
    
    llm = ChatOpenAI(model="gpt-4o", 
                     temperature=0,
                     )
    
    chain = ({"context": retriever, 
             "question": RunnablePassthrough()} 
             | rag_prompt_custom 
             | llm)
    
    return chain

In [82]:
embeddings = OpenAIEmbeddings(model='text-embedding-3-large')
new_db_debug = Chroma(persist_directory="./chroma_db_debug",
                embedding_function=embeddings,
                )
retriever_debug = new_db_debug.as_retriever()

In [85]:
user_question = 'medidas de seguridad de los sistemas neumáticos'

In [86]:
context = retriever_debug.invoke(user_question)

In [87]:
context

 Document(page_content='[\xa0safety\xa0gate\xa0]  to [\xa0lock\xa0] .\nðThe function locks of the safety gate are reset.\n18.11 Winding modes\nWinding modes The two-station turret winder has the following winding modes:\n•Contact winding\n•Gap Winding Mode\nWinCC OA The operation of the winding modes is carried out in WinCC\xa0OA.\n•The winding mode can be selected.\n•Settings for winding mode can be made.\n•The winding density is calculated and displayed.\nContact winding "Contact Winding Mode" is the standard winding mode.Operating the winder unit\nEliminating a failure of the pneumatic system\n218/225 Line operation manual P.1932 SHB v1 en 1/24/2022\n1Contact roll 2Gap ±\xa00\n3Mill roll\nDuring contact winding, the contact roll presses the film onto the mill roll. Thus,\nwhen winding up the film, the air inclusion between the film layers of the mill roll is\nreduced. The building up mill roll pushes the contact roll backward in its linear guid-\nance. The contact pressure is contin

In [88]:
len(context)

4

In [89]:
chain = get_conversational_chain(retriever)

In [90]:
response = chain.invoke(user_question)

In [91]:
print(response.content)

1. **Actuación de válvulas neumáticas:**
   - Solo accionar las válvulas neumáticas cuando no haya nadie en la zona de peligro.

2. **Movimientos inesperados:**
   - No permanecer en el rango de giro de los componentes neumáticos en movimiento.
   - Esperar hasta que se alcance la posición de parada.

3. **Partes móviles de la máquina:**
   - Asegurarse de que nadie esté en la zona de peligro antes de accionar las partes móviles de la máquina.
   - Usar equipo de protección personal.
   - Mantenerse alejado del área de movimiento.
   - No alcanzar las proximidades de las partes móviles de la máquina.
   - Esperar a que la máquina se detenga.

4. **Despresurización del sistema:**
   - Despresurizar el sistema antes de realizar trabajos de mantenimiento.
   - Solo liberar las conexiones cuando estén despresurizadas.

5. **Fallas en el sistema neumático:**
   - Asegurarse de que nadie esté en la zona de peligro al restablecer fallas en el suministro neumático.
   - Esperar hasta que se al

In [92]:
new_db = Chroma(persist_directory="./chroma_db",
                embedding_function=embeddings,
                )
retriever = new_db.as_retriever()

In [97]:
context_debug = retriever.invoke(user_question)

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


In [99]:
len(context_debug[0].page_content)

374997