# Hola, mundo en LangChain

## Instalar librerías principales y configuración de API Key de OpenAI

In [None]:
from openai import OpenAI
from custom_utils.helper import get_openai_api_key, print_response

In [None]:
OPENAI_API_KEY = get_openai_api_key()
openai = OpenAI(api_key=OPENAI_API_KEY)

## Carga de documents

In [None]:
import requests
from langchain.document_loaders import PyPDFLoader

urls = [
    'https://arxiv.org/pdf/2306.06031v1.pdf',
    'https://arxiv.org/pdf/2306.12156v1.pdf',
    'https://arxiv.org/pdf/2306.14289v1.pdf',
    'https://arxiv.org/pdf/2305.10973v1.pdf',
    'https://arxiv.org/pdf/2306.13643v1.pdf'
]

ml_papers = []

for i, url in enumerate(urls):
    response = requests.get(url)
    filename = f'paper{i+1}.pdf'
    with open(filename, 'wb') as f:
        f.write(response.content)
        print(f'Descargado {filename}')

        loader = PyPDFLoader(filename)
        data = loader.load()
        ml_papers.extend(data)

# Utiliza la lista ml_papers para acceder a los elementos de todos los documentos descargados
print('Contenido de ml_papers:')
print()

In [None]:
type(ml_papers), len(ml_papers), ml_papers[3]

## Split de documents

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,
    chunk_overlap=200,
    length_function=len
    )

documents = text_splitter.split_documents(ml_papers)

In [None]:
len(documents), documents[10]

## Embeddings e ingesta a base de datos vectorial

In [None]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma


embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

vectorstore = Chroma.from_documents(
    documents=documents,
    embedding=embeddings
)

retriever = vectorstore.as_retriever(
    search_kwargs={"k": 3}
    )

## Modelos de chat y cadenas para consulta de información

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

chat = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY,
    model_name='gpt-3.5-turbo',
    temperature=0.0
)

qa_chain = RetrievalQA.from_chain_type(
    llm=chat,
    chain_type="stuff",
    retriever=retriever
)

In [None]:
query = "qué es fingpt?"
qa_chain.run(query)

In [None]:
query = "qué hace complicado entrenar un modelo como el fingpt?"
qa_chain.run(query)

In [None]:
query = "qué es fast segment?"
qa_chain.run(query)

In [None]:
query = "cuál es la diferencia entre fast sam y mobile sam?"
qa_chain.run(query)