In [31]:
from langchain.document_loaders import PyPDFLoader
# pip install pypdf

In [32]:
loader = PyPDFLoader("./The Dip A Little Book That Teaches You When to Quit (and When to Stick).pdf")
pages = loader.load_and_split()

In [33]:
len(pages)

72

In [1]:
# Get API keys from OpenAI.

from keys import OPENAI_API_KEY


In [35]:
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings

In [36]:
# Embed and store the texts
# Supplying a persist_directory will store the embeddings on disk
persist_directory = 'db'

# pip install openai
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

# vectordb = Chroma.from_documents(documents=pages, embedding=embeddings, persist_directory=persist_directory)

Running Chroma using direct local API.
No existing DB found in db, skipping load
No existing DB found in db, skipping load


In [37]:
# vectordb.persist()
# vectordb = None

Persisting DB to disk, putting it in the save folder db
PersistentDuckDB del, about to run persist
Persisting DB to disk, putting it in the save folder db


In [38]:
# Now we can load the persisted database from disk, and use it as normal. 
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)

Running Chroma using direct local API.
loaded in 72 embeddings
loaded in 1 collections
collection with name langchain already exists, returning existing collection


In [39]:
from langchain import VectorDBQA, OpenAI

In [44]:
model = VectorDBQA.from_chain_type(llm=OpenAI(openai_api_key=OPENAI_API_KEY), chain_type="stuff", vectorstore=vectordb, return_source_documents=True)



In [41]:
question = "when should on quit any task?"
model.run(question)

' One should decide when to quit before the discomfort sets in. Outlining a quitting strategy ahead of time can help make sure that it is a strategic decision and enables you to make the right choices.'

In [42]:
question = "who is the author of the book?"
model.run(question)

' The author of the book is Seth Godin.'

In [43]:
question = "why vanilla is best selling ice cream"
model.run(question)

' Vanilla is the most popular flavor of ice cream according to the International Ice Cream Association. It is the first flavor listed in the top ten flavors of ice cream.'

In [45]:
question = "who is the author of the book?"
response = model({"query":question})
response

In [55]:
question = "things to consider before quitting"
response = model({"query":question})
response

{'query': 'things to consider before quitting',
 'result': ' Three questions to ask before quitting are: 1) Am I panicking? 2) Who am I trying to influence? 3) Is the pain of the dip worth the benefit of the light at the end of the tunnel? Deciding in advance when to quit is also important, and one should consider if pride is the only thing keeping them from quitting, if there is no dip to get through, and if there are sunk costs that should be ignored.',
 'source_documents': [Document(page_content='Quitting\tBefore\tYou\tStart\nHere’s\tan\tassignment\tfor\tyou:\tWrite\tit\tdown.\tWrite\tdown\tunder\twhat\ncircumstances\tyou’re\twilling\tto\tquit.\tAnd\twhen.\tAnd\tthen\tstick\twith\tit.\nDeciding\tin\tAdvance\tWhen\tto\tQuit\nHere’s\ta\tquote\tfrom\tultramarathoner\tDick\tCollins:\nDecide\tbefore\tthe\trace\tthe\tconditions\tthat\twill\tcause\tyou\tto\tstop\tand\ndrop\tout.\tYou\tdon’t\twant\tto\tbe\tout\tthere\tsaying,\t“Well\tgee,\tmy\tleg\thurts,\nI’m\ta\tlittle\tdehydrated,\tI’m\t

In [58]:
question = "What kind of jobs Doug did in his career?"
response = model({"query":question})
response

{'query': 'What kind of jobs Doug did in his career?',
 'result': ' Doug had seven jobs in the fourteen years he has been in the same company. It is not specified what kind of jobs he did.',
 'source_documents': [Document(page_content='long\ttime,\tmy\tfriend.”\nDoug\twon’t\tbuy\tit:\t“Yes,\tI’ve\tbeen\there\tfourteen\tyears,\tbut\tI’ve\thad\tseven\njobs.\tWhen\tI\tgot\there,\twe\twere\ta\tstartup,\tbut\tnow\twe’re\ta\tdivision\tof\tCisco.\tI’ve\ngot\tnew\tchallenges,\tand\tthe\tcommute\tis\tgreat…”\nGo\ton,\tinterrupt\thim.\nDoug\tneeds\tto\tleave\tfor\ta\tvery\tsimple\treason.\tHe’s\tbeen\tbranded.\tEveryone\nat\tthe\tcompany\thas\tan\texpectation\tof\twho\tDoug\tis\tand\twhat\the\tcan\tdo.\tWorking\nyour\tway\tup\tfrom\tthe\tmail\troom\tsounds\tsexy;\tbut,\tin\tfact,\tit’s\tentirely\tunlikely.\nDoug\thas\thit\ta\tplateau.\tHe’s\tnot\tgoing\tto\tbe\tchallenged,\tpushed,\tor\tpromoted\tto\npresident.\tDoug,\tregardless\tof\twhat\the\tcould\tactually\taccomplish,\thas\tstopped\nevolvin

In [60]:
question = "what does Netflix want?"
response = model({"query":question})
response

{'query': 'what does Netflix want?',
 'result': ' Netflix wants you to see a lot of movies, so that you become a loyal customer.',
 'source_documents': [Document(page_content='empty\tbike\tor\tto\tafford\ta\tmembership.\nNetflix\tgives\tyou\tan\tunlimited\tnumber\tof\tDVD\trentals\ta\tmonth,\tpostage\npaid,\tfor\t$10.\tHow\tcan\tthis\tbe?\tIf\tyou\twatched\ta\tmovie\tthe\tday\tit\tcame\tin\tand\nsent\tit\tright\tback,\tyou’d\tget\tto\tsee\tat\tleast\tsix\tmovies\tfor\t$10.\tOf\tcourse,\tthe\tkey\nis\tthat\tfor\tevery\tperson\twho\tsees\tsix,\tthere\tare\tplenty\tof\tpeople\twho\tlose\tinterest\nand\tsee\tone\tmovie,\tor\teven\tno\tmovies,\ta\tmonth.\tThese\tpeople\tsubsidize\tthe\ncommitted\tmembers.\tSure,\tNetflix\twants\tyou\tto\tsee\ta\tlot\tof\tmovies—that\tmakes\nyou\ta\tloyal\tcustomer.\tBut\tthe\teconomics\tof\tthe\tentire\tbusiness\twould\tfall\tapart\tif\nit\tweren’t\tfor\tthe\tuncommitted\tusers\twho\tjust\tdabble.\nFor\ta\tlong\ttime,\tairlines\toversold\ttheir\tflights\tbe

In [61]:
question = "how many dip curves are there? Summarize them?"
response = model({"query":question})
response

{'query': 'how many dip curves are there? Summarize them?',
 'result': ' There are three dip curves: The Dip, The Cul-de-Sac, and The Cliff. The Dip is a period of hard work and dedication that needs to be endured in order to achieve success. The Cul-de-Sac is a situation in which nothing much changes no matter how much effort is put in. The Cliff is a situation in which it is best to quit now if you want to be successful.',
 'source_documents': [Document(page_content='THE\tDIP', lookup_str='', metadata={'source': './The Dip A Little Book That Teaches You When to Quit (and When to Stick).pdf', 'page': 1}, lookup_index=0),
  Document(page_content='THE\tDIP', lookup_str='', metadata={'source': './The Dip A Little Book That Teaches You When to Quit (and When to Stick).pdf', 'page': 7}, lookup_index=0),
  Document(page_content='chemistry,\twell,\tthen,\tyou\tcan’t\tgo\tto\tmed\tschool.\nAt\tthe\tbeginning,\twhen\tyou\tannounce\tthat\tyou’re\tpremed,\tyou\tget\tall\tsorts\nof\tpositive\tfee

In [62]:
question = "what is ego dip?"
response = model({"query":question})
response

{'query': 'what is ego dip?',
 'result': " Ego dip is the dip that people get stuck in when they can't give up control or the spotlight.",
 'source_documents': [Document(page_content='THE\tDIP', lookup_str='', metadata={'source': './The Dip A Little Book That Teaches You When to Quit (and When to Stick).pdf', 'page': 1}, lookup_index=0),
  Document(page_content='THE\tDIP', lookup_str='', metadata={'source': './The Dip A Little Book That Teaches You When to Quit (and When to Stick).pdf', 'page': 7}, lookup_index=0),
  Document(page_content='THE\tDIP\nA\tLITTLE\tBOOK\tTHAT\tTEACHES\tYOU\nWHEN\tTO\tQUIT\t(AND\tWHEN\tTO\tSTICK)\nSeth\tGodin', lookup_str='', metadata={'source': './The Dip A Little Book That Teaches You When to Quit (and When to Stick).pdf', 'page': 2}, lookup_index=0),
  Document(page_content='bigger\tspace\tor\tinvest\tin\tnew\ttechniques.\tSuccessful\tentrepreneurs\tunderstand\nthe\tdifference\tbetween\tinvesting\tto\tget\tthrough\tthe\tDip\t(a\tsmart\tmove)\tor\ninvestin

In [63]:
question = "what flavors of ice creams mentioned?"
response = model({"query":question})
response

{'query': 'what flavors of ice creams mentioned?',
 'result': " Strawberry, Neapolitan, Vanilla, French Vanilla, Cookies 'n' Cream, and Chocolate Chip.",
 'source_documents': [Document(page_content='Strawberry\nNeapolitan', lookup_str='', metadata={'source': './The Dip A Little Book That Teaches You When to Quit (and When to Stick).pdf', 'page': 16}, lookup_index=0),
  Document(page_content='Vanilla', lookup_str='', metadata={'source': './The Dip A Little Book That Teaches You When to Quit (and When to Stick).pdf', 'page': 13}, lookup_index=0),
  Document(page_content='French\tVanilla\nCookies\t’n’\tCream', lookup_str='', metadata={'source': './The Dip A Little Book That Teaches You When to Quit (and When to Stick).pdf', 'page': 18}, lookup_index=0),
  Document(page_content='Chocolate\tChip', lookup_str='', metadata={'source': './The Dip A Little Book That Teaches You When to Quit (and When to Stick).pdf', 'page': 17}, lookup_index=0)]}

In [64]:
question = "how to be best in the world?"
response = model({"query":question})
response

{'query': 'how to be best in the world?',
 'result': " To be the best in the world, you need to find a field with a steep Dip - a barrier between those who try and those who succeed. You must also quit all the projects and investments that don't offer you the same opportunity. Finally, you need to have the guts to get through the hard stuff and come out on the other side.",
 'source_documents': [Document(page_content='Being\tthe\tBest\tin\tthe\tWorld\nIs\tSeriously\tUnderrated', lookup_str='', metadata={'source': './The Dip A Little Book That Teaches You When to Quit (and When to Stick).pdf', 'page': 9}, lookup_index=0),
  Document(page_content='Contents\nBeing\tthe\tBest\tin\tthe\tWorld\tIs\tSeriously\tUnderrated\nIf\tYou’re\tNot\tGoing\tto\tGet\tto\t#1,\tYou\tMight\tas\tWell\tQuit\tNow.\nThe\tBest\tin\tthe\tWorld?\nAcknowledgments', lookup_str='', metadata={'source': './The Dip A Little Book That Teaches You When to Quit (and When to Stick).pdf', 'page': 8}, lookup_index=0),
  Docume