# Vector database creation

In [1]:
!pip install -qU pinecone-client langchain_community cohere PyMuPDF

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.8/244.8 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m178.5/178.5 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.6/117.6 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m990.0/990.0 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m373.5/373.5 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.8/139.8 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━

In [2]:
import cohere
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from pinecone import Pinecone, ServerlessSpec
import os
import getpass
from langchain.schema import Document
from langchain.embeddings import CohereEmbeddings
from langchain_community.vectorstores import Pinecone as Pinecone_Langchain

In [3]:
index = 'haven-app'
os.environ['PINECONE_API_KEY'] = pinecone_secret_key = getpass.getpass('Enter Pinecone secret key:')
cohere_secret_key = getpass.getpass('Enter Cohere secret key:')

Enter Pinecone secret key:··········
Enter Cohere secret key:··········


In [25]:
pc = Pinecone(api_key=pinecone_secret_key)
if index not in pc.list_indexes().names():
  pc.create_index(
    name = index,
    dimension = 4096,
    metric = 'cosine',
    spec=ServerlessSpec(
      cloud="aws",
      region="us-west-2"
      )
  )
pc.describe_index(index)

{'deletion_protection': 'disabled',
 'dimension': 4096,
 'host': 'haven-app-rc5yupj.svc.aped-4627-b74a.pinecone.io',
 'metric': 'cosine',
 'name': 'haven-app',
 'spec': {'serverless': {'cloud': 'aws', 'region': 'us-east-1'}},
 'status': {'ready': True, 'state': 'Ready'}}

In [26]:
# Each document will have a size of 700 or less
character_text_splitter = CharacterTextSplitter(
    chunk_size = 700,
    chunk_overlap  = 200,
    length_function = len,
    is_separator_regex=False,
)
recursive_text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 700,
    chunk_overlap  = 200,
    length_function = len,
    is_separator_regex=False,
)

# Books

In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
import fitz

## Why Has Nobody Told Me This Before

A book about Dark Places, Motivation, Emotional Pain, Grief, Self-doubt, Fear, Stress, and having a Meaningful Life.

In [57]:
book_path = '/content/drive/My Drive/Books/Why Has Nobody Told Me This Before.pdf'
pdf_document = fitz.open(book_path)

text = ''
for page_num in range(15, pdf_document.page_count-20):
    page = pdf_document.load_page(page_num)
    text += page.get_text()

pdf_document.close()

print(text[:500])

CHAPTER 1
Understanding low mood
Everyone has low days.
Everyone.
But we all dier in how frequent the low days are and how severe
the low mood.
Something that I have come to realize over the years of working as a
psychologist is how much people struggle with low mood and never tell
a soul. Their friends and family would never know. They mask it, push it
away and focus on meeting expectations. Sometimes people arrive at
therapy a,er years of doing that.
They feel like they’re ge-ing something wr


In [58]:
chunks = recursive_text_splitter.split_text(text)
print(f"Number of chunks is {len(chunks)}")

Number of chunks is 733


In [59]:
documents = []
for chunk in chunks:
  documents.append(Document(
        page_content=chunk,
        metadata={"Book": "Why Has Nobody Told Me This Before", "Author": "Dr. Julie Smith"},
  ))

In [60]:
embeddings = CohereEmbeddings(cohere_api_key= cohere_secret_key, user_agent='haven-app')
vector_store = Pinecone_Langchain.from_documents(documents, embeddings, index_name=index)

In [66]:
retriever = vector_store.as_retriever()
matched_docs = retriever.get_relevant_documents('How can I be happy?')
for i, d in enumerate(matched_docs):
    print(f"\n## Document {i}\n")
    print(d.page_content)
    print(d.metadata)


## Document 0

Work on your relationship to failure and building shame
resilience to help you deal with stress in high-pressure
situations.
CHAPTER 32
The problem with ‘I just want
to be happy’
In therapy when we start to shine a light on the way forward and think
about what we want, it’s not uncommon to hear ‘I just want to be
happy.’
But the idea of happiness has been hijacked over the years by an
elusive fairytale of constant pleasure and satisfaction with life. You
don’t have to look far on social media to come across a wave of posts
telling you to ‘be positive, stay happy, eliminate negativity from your
life’.
We are given the impression that happiness is the norm and anything
{'Author': 'Dr. Julie Smith', 'Book': 'Why Has Nobody Told Me This Before'}

## Document 1

KNOW: You are not as happy as you think you are. The hap-
pier you are with something, the less you need other peo-
ple to be. Instead of wondering whether or not someone 
else will think you are enough, stop and ask

## The Mountain Is You

This book is about self-doubt and self-sabotage.

In [62]:
book_path = '/content/drive/My Drive/Books/The Mountain Is You.pdf'
pdf_document = fitz.open(book_path)

text = ''
for page_num in range(5, pdf_document.page_count-5):
    page = pdf_document.load_page(page_num)
    text += page.get_text()

pdf_document.close()

print(text[:500])

5
THE MOUNTAIN IS YOU
I N T R O D U C T I O N 
MUCH LIKE NATURE, life is very often working in our favor, 
even when it seems like we are only being faced with ad-
versity, discomfort, and change.
As forest fires are essential to the ecology of the environ-
ment—opening new seeds that require heat to sprout and 
rebuild a population of trees—our minds also go through 
periodic episodes of positive disintegration, or a cleansing 
through which we release and renew our self-concept. We 
know that 


In [63]:
chunks = recursive_text_splitter.split_text(text)
print(f"Number of chunks is {len(chunks)}")

Number of chunks is 581


In [64]:
documents = []
for chunk in chunks:
  documents.append(Document(
        page_content=chunk,
        metadata={"Book": "The Mountain Is You", "Author": "Brianna Wiest"},
  ))

In [65]:
embeddings = CohereEmbeddings(cohere_api_key= cohere_secret_key, user_agent='haven-app')
vector_store = Pinecone_Langchain.from_documents(documents, embeddings, index_name=index)

In [67]:
retriever = vector_store.as_retriever()
matched_docs = retriever.get_relevant_documents('How to get rid of self-sabotage?')
for i, d in enumerate(matched_docs):
    print(f"\n## Document {i}\n")
    print(d.page_content)
    print(d.metadata)


## Document 0

gence, willpower, or capability. That is usually not the case. 
Self-sabotage is not a way we hurt ourselves; it’s a way we 
try to protect ourselves. 
W H AT I S S E L F-S A B OTA G E?
Self-sabotage is when you have two conflicting desires. 
One is conscious, one is unconscious. You know how you 
want to move your life forward, and yet you are still, for 
some reason, stuck. 
29
THE MOUNTAIN IS YOU
When you have big, ongoing, insurmountable issues in 
your life—especially when the solutions seem so simple, 
so easy, and yet so impossible to stick with—what you 
have are not big problems but big attachments. 
People are pretty incredible in the fact that they basically
{'Author': 'Brianna Wiest', 'Book': 'The Mountain Is You'}

## Document 1

them out. Some of the most prominent symptoms of 
self-sabotage are as follows: 
YOU ARE MORE AWARE OF WHAT YOU DON’T WANT THAN 
WHAT YOU DO.
You spend more of your time worrying, ruminating, and 
focusing on what you hope doesn’t 

## Emotional First Aid

This books talk about Failure, rejection, Guilt and other everyday psychological injuries.

In [68]:
book_path = '/content/drive/My Drive/Books/Emotional First Aid.pdf'
pdf_document = fitz.open(book_path)

text = ''
for page_num in range(5, pdf_document.page_count):
    page = pdf_document.load_page(page_num)
    text += page.get_text()

pdf_document.close()

print(text[:500])

Introduction
A
sk a ten-​
­
year-​
­
old what you should do if you catch a cold and 
the child would immediately recommend getting into bed 
and drinking chicken soup. Ask what you should do if you get a 
cut on your knee and the child would advocate cleaning it (or us-
ing antibacterial ointment) and bandaging it. Children also know 
that if you break a bone in your leg you need to get a cast on it so 
it mends correctly. If you then asked why these steps were neces-
sary they would tell you th


In [69]:
chunks = recursive_text_splitter.split_text(text)
print(f"Number of chunks is {len(chunks)}")

Number of chunks is 1017


In [70]:
documents = []
for chunk in chunks:
  documents.append(Document(
        page_content=chunk,
        metadata={"Book": "Emotional First Aid", "Author": "Guy Winsh"},
  ))

In [71]:
embeddings = CohereEmbeddings(cohere_api_key= cohere_secret_key, user_agent='haven-app')
vector_store = Pinecone_Langchain.from_documents(documents, embeddings, index_name=index)

In [72]:
retriever = vector_store.as_retriever()
matched_docs = retriever.get_relevant_documents('I feel like a failure')
for i, d in enumerate(matched_docs):
    print(f"\n## Document {i}\n")
    print(d.page_content)
    print(d.metadata)


## Document 0

CHAPTER 20
You are not your mistakes
Most self-doubt is linked to the relationship we have with failure. I am
not about to sit here and tell you to just be OK with failure, then
everything will be easy. That is not true. Failure is never easy. It stings
every time. We all want to be enough. We all want to be acceptable
and failure is a sign that maybe we weren’t enough this time.
It is not only our relationship with our own failure that needs to
change, but also how we respond to the failure of others. You don’t
have to spend too long on Twi-er to develop an overwhelming fear of
failure. Say the wrong thing in a tweet and a collective army of
{'Author': 'Dr. Julie Smith', 'Book': 'Why Has Nobody Told Me This Before'}

## Document 1

is what keeps you completely stuck. If you feel as though 
you truly failed yourself in some profound way, it becomes 
even more crucial that you move on and create the expe-
rience you desire now.
Your life is not over. You did not fail ind

## 101 Essays That Will Change The Way You Think

In this book, Brianna Wiest explores pursuing purpose over passion, embracing negative thinking, seeing the wisdom in daily routine, and becoming aware of the cognitive biases that are creating the way we see our lives.

In [73]:
book_path = '/content/drive/My Drive/Books/101 Essays That Will Change The Way You Think.pdf'
pdf_document = fitz.open(book_path)

text = ''
for page_num in range(8, pdf_document.page_count):
    page = pdf_document.load_page(page_num)
    text += page.get_text()

pdf_document.close()

print(text[:500])

recreating a solution or an ideal of the past. When things
don’t work out the way you want them to, you think you’ve
failed only because you didn’t re-create something you
perceived as desirable. In reality, you likely created
something better, but foreign, and your brain misinterpreted it
as “bad” because of that. (Moral of the story: Living in the
moment isn’t a lofty ideal reserved for the Zen and
enlightened; it’s the only way to live a life that isn’t infiltrated
with illusions. It’s the on


In [74]:
chunks = recursive_text_splitter.split_text(text)
print(f"Number of chunks is {len(chunks)}")

Number of chunks is 1020


In [75]:
documents = []
for chunk in chunks:
  documents.append(Document(
        page_content=chunk,
        metadata={"Book": "101 Essays That Will Change The Way You Think", "Author": "Brianna Wiest"},
  ))

In [76]:
embeddings = CohereEmbeddings(cohere_api_key= cohere_secret_key, user_agent='haven-app')
vector_store = Pinecone_Langchain.from_documents(documents, embeddings, index_name=index)

# Articles

## Building Better Mental Health

In [79]:
book_path = '/content/drive/My Drive/Books/Building better mental health.pdf'
pdf_document = fitz.open(book_path)

text = ''
for page_num in range(pdf_document.page_count):
    page = pdf_document.load_page(page_num)
    text += page.get_text()

pdf_document.close()

print(text[:500])

Understanding good mental health
Your mental health influences how you think, feel, and behave in daily life. It also
affects your ability to cope with stress, overcome challenges, build relationships, and
recover from life’s setbacks and hardships.
Strong mental health isn’t just the absence of mental health problems. Being
mentally or emotionally healthy is much more than being free of depression, anxiety,
or other psychological issues. Rather than the absence of mental illness, mental
health 


In [80]:
chunks = recursive_text_splitter.split_text(text)
print(f"Number of chunks is {len(chunks)}")

Number of chunks is 35


In [81]:
documents = []
for chunk in chunks:
  documents.append(Document(
        page_content=chunk,
        metadata={"Article": "Building better mental health", "Source": "HelpGuide.org"},
  ))

In [82]:
embeddings = CohereEmbeddings(cohere_api_key= cohere_secret_key, user_agent='haven-app')
vector_store = Pinecone_Langchain.from_documents(documents, embeddings, index_name=index)

## The Mental Health Benefits of Exercise

In [83]:
book_path = '/content/drive/My Drive/Books/The Mental Health Benefits of Exercise.pdf'
pdf_document = fitz.open(book_path)

text = ''
for page_num in range(pdf_document.page_count):
    page = pdf_document.load_page(page_num)
    text += page.get_text()

pdf_document.close()

print(text[:500])

What are the mental health benefits of exercise?
Exercise is not just about aerobic capacity and muscle size. Sure, exercise can
improve your physical health and your physique, trim your waistline, improve your
sex life, and even add years to your life. But that’s not what motivates most people to
stay active.
People who exercise regularly tend to do so because it gives them an enormous
sense of well-being. They feel more energetic throughout the day, sleep better at
night, have sharper memories


In [84]:
chunks = recursive_text_splitter.split_text(text)
print(f"Number of chunks is {len(chunks)}")

Number of chunks is 31


In [85]:
documents = []
for chunk in chunks:
  documents.append(Document(
        page_content=chunk,
        metadata={"Article": "The Mental Health Benefits of Exercise", "Source": "HelpGuide.org"},
  ))

In [86]:
embeddings = CohereEmbeddings(cohere_api_key= cohere_secret_key, user_agent='haven-app')
vector_store = Pinecone_Langchain.from_documents(documents, embeddings, index_name=index)

## Tips for Managing Adult ADHD


In [87]:
book_path = '/content/drive/My Drive/Books/Tips for Managing Adult ADHD.pdf'
pdf_document = fitz.open(book_path)

text = ''
for page_num in range(pdf_document.page_count):
    page = pdf_document.load_page(page_num)
    text += page.get_text()

pdf_document.close()

print(text[:500])

How to deal with Adult ADHD (or ADD)
If you have attention deficit hyperactivity disorder (ADHD), previously known as
ADD, everything from paying the bills on time to keeping up with work, family, and
social demands can seem overwhelming. ADHD can present challenges for adults
across all areas of life and can be tough on your health and both your personal and
on-the-job relationships. Your symptoms may lead to extreme procrastination, trouble
meeting deadlines, and impulsive behavior. In additio


In [88]:
chunks = recursive_text_splitter.split_text(text)
print(f"Number of chunks is {len(chunks)}")

Number of chunks is 38


In [89]:
documents = []
for chunk in chunks:
  documents.append(Document(
        page_content=chunk,
        metadata={"Article": "Tips for Managing Adult ADHD", "Source": "HelpGuide.org"},
  ))

In [90]:
embeddings = CohereEmbeddings(cohere_api_key= cohere_secret_key, user_agent='haven-app')
vector_store = Pinecone_Langchain.from_documents(documents, embeddings, index_name=index)

## Coping with Grief and Loss

In [91]:
book_path = '/content/drive/My Drive/Books/Coping with Grief and Loss.pdf'
pdf_document = fitz.open(book_path)

text = ''
for page_num in range(pdf_document.page_count):
    page = pdf_document.load_page(page_num)
    text += page.get_text()

pdf_document.close()

print(text[:500])

What is grief?
Grief is a natural response to loss. It’s the emotional suffering you feel when
something or someone you love is taken away. Often, the pain of loss can feel
overwhelming. You may experience all kinds of difficult and unexpected emotions,
from shock or anger to disbelief, guilt, and profound sadness.
The pain of grief can also disrupt your physical health, making it difficult to sleep, eat,
or even think straight. These are normal reactions to loss—and the more significant
the los


In [92]:
chunks = recursive_text_splitter.split_text(text)
print(f"Number of chunks is {len(chunks)}")

Number of chunks is 34


In [93]:
documents = []
for chunk in chunks:
  documents.append(Document(
        page_content=chunk,
        metadata={"Article": "Coping with Grief and Loss", "Source": "HelpGuide.org"},
  ))

In [94]:
embeddings = CohereEmbeddings(cohere_api_key= cohere_secret_key, user_agent='haven-app')
vector_store = Pinecone_Langchain.from_documents(documents, embeddings, index_name=index)

## Dealing with Uncertainty

In [95]:
book_path = '/content/drive/My Drive/Books/Dealing with Uncertainty.pdf'
pdf_document = fitz.open(book_path)

text = ''
for page_num in range(pdf_document.page_count):
    page = pdf_document.load_page(page_num)
    text += page.get_text()

pdf_document.close()

print(text[:500])

The role of uncertainty in life
Uncertainty is all around us, never more so than today. Whether it concerns a global
pandemic, the economy, or your finances, health, and relationships, much of what
lies ahead in life remains uncertain. Yet as human beings, we crave security. We
want to feel safe and have a sense of control over our lives and well-being. Fear and
uncertainty can leave you feeling stressed, anxious, and powerless over the direction
of your life. It can drain you emotionally and tr


In [96]:
chunks = recursive_text_splitter.split_text(text)
print(f"Number of chunks is {len(chunks)}")

Number of chunks is 29


In [97]:
documents = []
for chunk in chunks:
  documents.append(Document(
        page_content=chunk,
        metadata={"Article": "Dealing with Uncertainty", "Source": "HelpGuide.org"},
  ))

In [98]:
embeddings = CohereEmbeddings(cohere_api_key= cohere_secret_key, user_agent='haven-app')
vector_store = Pinecone_Langchain.from_documents(documents, embeddings, index_name=index)