In [1]:
import time
import os

import tiktoken
from queue import Queue

from langchain.document_loaders import pdf
from langchain.indexes import VectorstoreIndexCreator
from langchain.llms.openai import OpenAI
from langchain.callbacks.base import BaseCallbackHandler

In [2]:
pdf_path = "./docs/2022 State of Mental Health in America.pdf"
with open('openai_api_key.txt', 'r') as f:
    api_key = f.read()

os.environ['OPENAI_API_KEY'] = api_key

In [3]:
loader = pdf.PDFMinerLoader(pdf_path)
documents = loader.load()
print('Documents count:', len(documents))
for doc in documents:
    print('*' * 42)
    print(doc.page_content)

Documents count: 1
******************************************
Acknowledgments 

Mental Health America (MHA) was founded in 1909 and is the nation’s leading community-based nonprofit 

dedicated to addressing the needs of those living with mental illness and promoting the overall mental health 

of all. Our work is driven by our commitment to promote mental health as a critical part of overall wellness, 

including prevention services for all, early identification and intervention for those at risk, integrated services 

and supports for those who need them, with recovery as the goal.  

MHA dedicates this report to mental health advocates who fight tirelessly to help create parity and reduce 

disparities and inequities for people with mental health concerns. To our affiliates, thank you for your incredible 

state-level advocacy and dedication to promoting recovery and protecting consumer rights!  

This publication was made possible by the generous support of Alkermes, Neurocrine Bi

In [4]:
text = '\n'.join([doc.page_content for doc in documents])
encoding = tiktoken.encoding_for_model('text-embedding-ada-002')
tokens_count = len(encoding.encode(text))
print('Tokens count:', tokens_count)
print('Embeddings cost:', f'${tokens_count / 1000 * 0.0004:.6f}')

Tokens count: 30605
Embeddings cost: $0.012242


In [5]:
index = VectorstoreIndexCreator().from_loaders([loader])

Using embedded DuckDB without persistence: data will be transient


In [6]:
print('Embeddings count:', index.vectorstore._collection._client._db.count('langchain'))

Embeddings count: 123


In [14]:
class CallbackHandler(BaseCallbackHandler):
    queue = Queue()
    stream_end = False

    def on_llm_new_token(self, token: str, **kwargs):
        self.queue.put(token)

    def on_llm_end(self, _,  **kwargs):
        self.stream_end = True

    def stream_aiter(self):
        while not self.stream_end or not self.queue.empty():
            while not self.queue.empty():
                yield self.queue.get()
            time.sleep(0.1)


callback_handler = CallbackHandler()
llm = OpenAI(temperature=0, streaming=True, callbacks=[callback_handler])

question = "How many americans are experiencing mental illness by age?"
response = index.query_with_sources(
    question, llm, return_source_documents=True
)

answer = ''.join([w for w in callback_handler.stream_aiter()])
print('Question', question)
print('Answer:', answer)

for s in response['source_documents']:
    print("*" * 40)
    print(s.page_content)

Question How many americans are experiencing mental illness by age?
Answer:  19.86% of adults and 28.2% of youth are experiencing a mental illness.
SOURCES: ./docs/2022 State of Mental Health in America.pdf
****************************************
*2021 Overall Ranking is taken from The State of Mental Health in America 2021 Report, based on data from 2017-2018. 2022 Overall 
Ranking is taken from this report, based on data from 2018-2019.  

19 

 
 
  
 
 
 
 
 
 
 
Adult Prevalence of Mental Illness 
Adults With Any Mental Illness (AMI) 

19.86% of adults are experiencing a 

mental illness. 

Equivalent to nearly 50 million 

Americans. 

4.91% are experiencing a severe mental 

illness. 

The states with the largest increases in 

Adults With Any Mental Illness (AMI) 

were Ohio (2.24%), Nebraska (2.22%), 

Wyoming (2.22%), and Oklahoma 

(2.11%). 

The state prevalence of adult 

mental illness ranges from: 

 16.37% (NJ) 
Ranked 1-13 

26.86 % (UT)  
Ranked 39-51 

Rank 
1 
2 
