In [1]:
from langchain_ollama.embeddings import OllamaEmbeddings
from langchain_ollama.llms import OllamaLLM


MODEL = "llama2"

model = OllamaLLM(model=MODEL)
embeddings = OllamaEmbeddings(model=MODEL)

In [2]:
import os

from dotenv import load_dotenv


load_dotenv(".env")

PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
PINECONE_API_REGION = os.environ.get("PINECONE_API_REGION")

In [3]:
import time

from pinecone import Pinecone, ServerlessSpec


pc = Pinecone(api_key=PINECONE_API_KEY)
serverless_spec = ServerlessSpec(cloud="aws", region=PINECONE_API_REGION)
index_name = "git-book"

existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]
if index_name not in existing_indexes:
    pc.create_index(
        name=index_name,
        dimension=4096,
        metric="cosine",
        spec=serverless_spec,
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

index = pc.Index(index_name)

In [4]:
existing_indexes

['langchainpinecone', 'test-numerico', 'git-book']

In [17]:
import pathlib

import joblib

from langchain_community.document_loaders import PyPDFLoader

try:
    documents = joblib.load('./cache/GitNotesForProfessionals-11-65.jpkl')
except FileNotFoundError:
    loader = PyPDFLoader('./pdfs/GitNotesForProfessionals-11-65.pdf')
    documents = loader.load()
    joblib.dump(documents, './cache/GitNotesForProfessionals-11-65.jpkl', compress=9)

documents

[Document(metadata={'source': './pdfs/GitNotesForProfessionals-11-65.pdf', 'page': 0}, page_content='GoalKicker.com – Git ® Notes for Professionals 2Chapter 1: Getting started with Git\nVersion Release Date\n2.13 2017-05-10\n2.12 2017-02-24\n2.11.1 2017-02-02\n2.11 2016-11-29\n2.10.2 2016-10-28\n2.10 2016-09-02\n2.9 2016-06-13\n2.8 2016-03-28\n2.7 2015-10-04\n2.6 2015-09-28\n2.5 2015-07-27\n2.4 2015-04-30\n2.3 2015-02-05\n2.2 2014-11-26\n2.1 2014-08-16\n2.0 2014-05-28\n1.9 2014-02-14\n1.8.3 2013-05-24\n1.8 2012-10-21\n1.7.10 2012-04-06\n1.7 2010-02-13\n1.6.5 2009-10-10\n1.6.3 2009-05-07\n1.6 2008-08-17\n1.5.3 2007-09-02\n1.5 2007-02-14\n1.4 2006-06-10\n1.3 2006-04-18\n1.2 2006-02-12\n1.1 2006-01-08\n1.0 2005-12-21\n0.99 2005-07-11\nSection 1.1: Create your ﬁrst repository, then add and commit\nﬁles\nAt the command line, ﬁrst verify that you have Git installed:\nOn all operating systems:\ngit --version\nOn UNIX-like operating systems:'),
 Document(metadata={'source': './pdfs/GitNotesFor

In [6]:
from langchain_pinecone.vectorstores import PineconeVectorStore


vector_store = PineconeVectorStore(index=index, embedding=embeddings)

if index.describe_index_stats()["total_vector_count"] == 0:
    vector_store.add_documents(documents)

retriever = vector_store.as_retriever()

ConnectError: [Errno 111] Connection refused

In [51]:
from operator import itemgetter

from langchain.prompts import PromptTemplate

import langchain_core.output_parsers


template = """
Respondé a la siguiente pregunta basada en el contexto dado a continuación.
Si no podés responder a la pregunta, respondé con "No tengo una respuesta."

Contexto: {context}

Pregunta: {question}
"""

prompt = PromptTemplate.from_template(template)
parser = langchain_core.output_parsers.StrOutputParser()


chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | parser
)

In [54]:
questions = []

chain.batch([{"question": q} for q in questions])