In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

openai_api_key = os.environ['OPENAI_API_KEY']
pinecone_api_key = os.environ['PINECONE_API_KEY']

Read pdf file and answer questions

In [2]:
import tiktoken  # !pip install tiktoken

tokenizer = tiktoken.get_encoding('cl100k_base')

# create the length function
def tiktoken_len(text):
    tokens = tokenizer.encode(
        text,
        disallowed_special=()
    )
    return len(tokens)

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=20,
    length_function=tiktoken_len,
)

In [4]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("/home/berk/Downloads/pkpadmin,+529-2711-1-CE.pdf")

data = loader.load()

In [5]:
from langchain.embeddings.openai import OpenAIEmbeddings

model_name = 'text-embedding-ada-002'

embeddings = OpenAIEmbeddings(
    model=model_name,
)

texts = text_splitter.split_documents(data)
texts

[Document(page_content='\x18 cartographic perspectives    Number 43,  Fall 2002\ncartographic    perspectives\nNumber 43, Fall 2002journal of the North American Cartographic Information Society\nNACIS WEB SITE \nwww.nacis.orgLetter from the Editor in this issue\nFEATurEd Ar TIClES\nEarle Birney’s “Mappemounde”: Visualizing Poetry With Maps 4\nAdele J. Haft\nHillshading With Oriented Halftones 25\nPatrick J. KennellyGetting Real: Reflecting on the New Look of 43\nNational Park Service Maps\nTom PattersonBOOK rEVIEWS\n \nAtlas of Oregon, Second Edition 57\nReviewed by Joseph StollAtlas of Oregon CD-ROM 59\nReviewed by Joseph StollThe Map that Changed the World: William Smith and the 61\nBirth of Modern Geology\nReviewed by Brenden E. McNeilFEATurEd Ar TIClE FIGurES \nEarle Birney’s “Mappemounde”: Visualizing Poetry With Maps 65\nGetting Real: Reflecting on the New Look of 74\nNational Park Service Maps\n(continued on page 3 )Dear Members of NACIS,\nAnother issue of Cartographic \nPerspec

In [6]:
import pinecone
from langchain.vectorstores import Pinecone

index_name = 'langchain-test'

# Initialize pinecone client
pinecone.init(
        api_key=pinecone_api_key,
        environment="gcp-starter"
)

# index = pinecone.Index(index_name)
# vectorstore = Pinecone(index, embeddings.embed_query, "text")

if index_name not in pinecone.list_indexes():
        # create a new index
        pinecone.create_index(
                name=index_name,
                metric='dotproduct',
                dimension=1536 # 1536 dim of text-embedding-ada-002
        )

        # embed and load data to vectorstore
        vectorstore = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name)

vectorstore = Pinecone.from_existing_index(index_name, embeddings)

index = pinecone.Index(index_name)
index.describe_index_stats()


  from tqdm.autonotebook import tqdm


{'dimension': 1536,
 'index_fullness': 0.00106,
 'namespaces': {'': {'vector_count': 106}},
 'total_vector_count': 106}

In [7]:
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain
from langchain.chains.question_answering import load_qa_chain

llm = OpenAI(temperature=0)

query = "Who is “Mappemounde”?"

# Method 1
chain = load_qa_chain(llm, chain_type="stuff")
docs = vectorstore.similarity_search(query)

chain.run(input_documents=docs, question=query)

# Method 2
# qa = RetrievalQA.from_chain_type(
#     llm=llm,
#     chain_type="stuff",
#     retriever=vectorstore.as_retriever()
# )

' “Mappemounde” is a poem composed in 1945 by the esteemed Canadian poet Earle Birney. It imagines the sea voyage—with its isolation and terror, its loneliness and awe—as a poignant metaphor for life’s journey.'