In [17]:
from apikey import OPENAI_KEY, PINECONE_KEY, PINECONE_ENV

In [2]:
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load Data

## Epictetus

In [3]:
loader = UnstructuredPDFLoader("./data/epictetus_discourses.pdf")
discourses = loader.load()

In [5]:
print(f"{len(discourses)} documents in the data")
print(f"{len(discourses[0].page_content)} characters in the document")

1 documents in the data
612025 characters in the document


In [6]:
loader = UnstructuredPDFLoader("./data/epictetus_encheiridion.pdf")
encheiridion= loader.load()

In [7]:
print(f"{len(encheiridion)} documents in the data")
print(f"{len(encheiridion[0].page_content)} characters in the document")

1 documents in the data
44270 characters in the document


# Break up data in to smaller documents

In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 0)

In [9]:
discourses_texts = text_splitter.split_documents(discourses)
encheiridion_texts = text_splitter.split_documents(encheiridion)
epictetus_texts = discourses_texts+encheiridion_texts

In [10]:
print(f"{len(discourses_texts)} chunked-up documents")
print(f"{len(encheiridion_texts)} chunked-up documents")
print(f"{len(epictetus_texts)} chunked-up documents")

889 chunked-up documents
61 chunked-up documents
950 chunked-up documents


# Create document embeddings

In [11]:
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone
import openai
openai.api_key = OPENAI_KEY

# openai.Engine.list()  # check we have authenticated

  from tqdm.autonotebook import tqdm


In [12]:
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_KEY)

In [15]:
pinecone.init(
    api_key=PINECONE_KEY,
    environment=PINECONE_ENV
)
index_name = 'marcus'

In [16]:
docsearch = Pinecone.from_texts([t.page_content for t in epictetus_texts], 
                                embeddings, index_name=index_name)

# Query documents

In [18]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

In [19]:
llm = OpenAI(temperature=0.3, openai_api_key=OPENAI_KEY)
chain = load_qa_chain(llm, chain_type='stuff')

In [21]:
query = "What does it mean to live a good life?"
docs= docsearch.similarity_search(query)

In [22]:
chain.run(input_documents = docs, question = query)

' Living a good life means aiming for high goals, such as freedom and happiness, and making the effort to achieve them. It also means making wise decisions and understanding the difference between good and bad, and between things that are neither good nor bad. Finally, it means being conscious of obeying God and performing the acts of a wise and good man.'