In [11]:
import os
from pathlib import Path

from llama_index import GPTVectorStoreIndex, download_loader, MockLLMPredictor, ServiceContext, MockEmbedding, LLMPredictor
from llama_index.node_parser.simple import SimpleNodeParser
from llama_index.langchain_helpers.text_splitter import TokenTextSplitter

from langchain.llms.openai import OpenAI

import llama_index
print('LlamaIndex version:', llama_index.__version__)

LlamaIndex version: 0.6.2


In [4]:
pdf_path = "./docs/2022 State of Mental Health in America.pdf"
api_key = "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"

os.environ['OPENAI_API_KEY'] = api_key

In [5]:
pdf_reader = download_loader("PDFReader")
loader = pdf_reader()
documents = loader.load_data(file=Path(pdf_path))

print('Documents count:', len(documents))
for doc in documents:
    print('*' * 42)
    print(doc.text)

Documents count: 1
******************************************

Acknowledgments  
Mental Health America (MHA) was founded in 1909 and is the nation’s leading community -based nonprofit 
dedicated to addressing the needs of those living with mental illness and promoting the overall mental health 
of all.  Our work is driven by our commitment  to promote mental health as a critical part of overall wellness, 
including prevention services for all, early identification and intervention for those at risk, integrated services  
and supports  for those who need them , with recovery as the goal.  
MHA dedi cates this report to mental health advocates who fight tirelessly to help create parity and reduce 
disparit ies and inequities  for people with mental health concerns. To our affiliates, thank you for your incredible 
state -level advocacy and dedication to pr omoting recovery and protecting consumer rights!   
This publication was made possible by the generous support of Alkermes, Neurocrin

In [6]:
text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=0)
node_parser = SimpleNodeParser(text_splitter=text_splitter)

llm_predictor = MockLLMPredictor(max_tokens=2049)
embed_model = MockEmbedding(embed_dim=1536)

service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor, embed_model=embed_model, node_parser=node_parser)

_index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
index_tokens = embed_model.last_token_usage
index_tokens

33068

In [25]:
llm = OpenAI(model_kwargs={"best_of": 1})
llm_predictor = LLMPredictor(llm=llm)
service_context = ServiceContext.from_defaults(node_parser=node_parser, llm_predictor=llm_predictor)
index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context)
query_engine = index.as_query_engine(streaming=True)

In [26]:
question = "How many americans are experiencing mental illness by age?"
response = query_engine.query(question)
sources = [n.node.get_text() for n in response.source_nodes]
answer = ' '.join([w for w in response.response_gen])

print('Question', question)
print('Answer:', answer)

for n in response.source_nodes:
    print("*" * 40)
    print(n.node.get_text())

Question How many americans are experiencing mental illness by age?
Answer: 
 It  is  not  possible  to  answer  this  question  based  on  the  provided  context  information . 
****************************************
22.56  98,000 
44 Missouri  22.71  1,056 ,000 
45 District of Columbia  22.83  129,000 
46 Colorado  23.20  1,014 ,000 
47 Washington  23.43  1,360 ,000 
48 Ohio  23.64  2,112 ,000 
49 Oregon  23.75  783,000 
50 West Virginia  24.62  347,000 
51 Utah  26.86  599,000 
 National  19.86 49,564 ,000 19.86% of adults are experiencing a 
mental illness.  
Equivalent to nearly  50 million 
Americans.  
4.91% are experiencing a severe mental 
illness.  
 
 
 
 
 
    
 
According to SAMHSA, “Any Mental Illness (AMI) is defined as having a diagnosable mental, behavioral, or emotional disorder, other 
than a developmental or substance use disorder, assessed by the Mental Health Surveillance Study (MHSS) Structured Clinical I nterview 
for the Diagnostic and Statistical Manual of 