# Hola, mundo en LangChain

## Instalar librerías principales y configuración de API Key de OpenAI

In [6]:
from openai import OpenAI
client = OpenAI()

response = client.responses.create(
    model="gpt-5",
    input="Write a short bedtime story about a unicorn."
)

print(response.output_text)


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

In [None]:
#%%capture
#!pip install langchain pypdf openai chromadb tiktoken

# nejepi4276@fanwn.com

In [None]:
from openai import OpenAI

client = OpenAI(
  api_key="'OPENAI_API_KEY'"
)

response = client.responses.create(
  model="gpt-5-nano",
  input="write a haiku about ai",
  store=True,
)

print(response.output_text)

AuthenticationError: Error code: 401 - {'error': {'message': "Incorrect API key provided: 'OPENAI_****KEY'. You can find your API key at https://platform.openai.com/account/api-keys.", 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

In [None]:
from getpass import getpass
import os

OPENAI_API_KEY = getpass('Enter the secret value: ')
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

## Carga de documents

In [None]:
import requests
from langchain.document_loaders import PyPDFLoader

urls = [
    'https://arxiv.org/pdf/2306.06031v1.pdf',
    'https://arxiv.org/pdf/2306.12156v1.pdf',
    'https://arxiv.org/pdf/2306.14289v1.pdf',
    'https://arxiv.org/pdf/2305.10973v1.pdf',
    'https://arxiv.org/pdf/2306.13643v1.pdf'
]

ml_papers = []

for i, url in enumerate(urls):
    response = requests.get(url)
    filename = f'paper{i+1}.pdf'
    with open(filename, 'wb') as f:
        f.write(response.content)
        print(f'Descargado {filename}')




# Utiliza la lista ml_papers para acceder a los elementos de todos los documentos descargados
print('Contenido de ml_papers:')
print()

In [None]:
type(ml_papers), len(ml_papers), ml_papers[3]

## Split de documents

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,
    chunk_overlap=200,
    length_function= len
    )

documents = text_splitter.split_documents(ml_papers)

In [None]:
len(documents), documents[10]

## Embeddings e ingesta a base de datos vectorial

In [None]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma


embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

vectorstore = Chroma.from_documents(
    documents=documents,
    embeddings=embeddings
)

retriever = vectorstore.as_retriever(   
    search_kwargs={"k":3}
    )

## Modelos de chat y cadenas para consulta de información

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

chat = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY,
    model_name='gpt-3.5-turbo',
    temperature=0.0
)

qa_chain = RetrievalQA.from_chain_type(
    llm=chat,
    chain_type="stuff",
    retriever=retriever
)

In [None]:
query = "qué es fingpt?"
qa_chain.run(query)

In [None]:
query = "qué hace complicado entrenar un modelo como el fingpt?"
qa_chain.run(query)

In [None]:
query = "qué es fast segment?"
qa_chain.run(query)

In [None]:
query = "cuál es la diferencia entre fast sam y mobile sam?"
qa_chain.run(query)