In [1]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [2]:
!pip3 install pypdf -q

In [3]:
def load_document(file):
    import os
    name, extension = os.path.splitext(file)

    if extension == '.pdf':
        from langchain.document_loaders import PyPDFLoader  # We are importing here to prevent circular dependency.
        print(f"Loading {file}")
        loader = PyPDFLoader(file)  # This also works with online PDFs!
    elif extension == '.docx':
        from langchain.document_loaders import Docx2txtLoader
        print(f"Loading {file}")
        loader = Docx2txtLoader(file)  # This also works with online PDFs!
    # Add as many extension types as we want.
    else:
        print('Document format is not supported!')
        return None
    
    data = loader.load()
    return data

In [4]:
def chunk_data(data, chunk_size=256, chunk_overlap=0):
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    chunks = text_splitter.split_documents(data)
    return chunks

In [5]:
def print_embedding_cost(texts):
    import tiktoken
    enc = tiktoken.encoding_for_model('text-embedding-ada-002')
    total_tokens = sum([len(enc.encode(page.page_content)) for page in texts])
    print(f'Total Tokens: {total_tokens}')
    print(f'Embedding cost in USD: ${total_tokens / 1000 * 0.0004:.6f}')

In [6]:
def insert_or_fetch_embeddings(index_name):
    import pinecone
    from langchain.vectorstores import Pinecone
    from langchain.embeddings.openai import OpenAIEmbeddings

    embeddings = OpenAIEmbeddings()

    pinecone.init(api_key=os.environ.get('PINECONE_API_KEY'),
                  environment=os.environ.get('PINECONE_ENV'))
    
    if index_name in pinecone.list_indexes():
        print(f"Index {index_name} already exists: Loading embeddings ...")
        vector_store = Pinecone.from_existing_index(index_name, embeddings)
        print("Done")
    else:
        print(f"Creating index {index_name} and embeddings ...")
        pinecone.create_index(index_name, dimension=1536, metric='cosine')
        vector_store = Pinecone.from_documents(chunks, embeddings, index_name=index_name)
        print("Done")
        
    return vector_store

In [7]:
def delete_pinecone_index(index_name='all'):
    import pinecone
    pinecone.init(api_key=os.environ.get('PINECONE_API_KEY'),
                  environment=os.environ.get('PINECONE_ENV'))
    
    if index_name == 'all':
        indexes = pinecone.list_indexes()
        print(f"Deleting all indexes {indexes}...")
        for index in indexes:
            pinecone.delete_index(index)
        print("Done")
    else:
        print(f"Deleting index {index_name}...", end="")
        pinecone.delete_index(index_name)
        print("Done")


In [8]:
data = load_document('./Venu_2_manual.pdf')

Loading ./Venu_2_manual.pdf


In [9]:
print(data[0].page_content)

VENU® 2 SERIES 
Owner’s Manual


In [10]:
print(data[0].metadata)
print(f"{len(data)} pages in this data.")

{'source': './Venu_2_manual.pdf', 'page': 0}
72 pages in this data.


In [11]:
chunks = chunk_data(data=data, chunk_overlap=20)
print(len(chunks))

710


In [12]:
print(chunks[10].page_content)

Wi-Fi Alliance Corporation. Windows® is a registered trademark of Microsoft Corporation in the United States and other countries. Other trademarks and trade names are those of their 
respective owners.


In [13]:
print_embedding_cost(chunks)

Total Tokens: 32079
Embedding cost in USD: $0.012832


In [14]:
delete_pinecone_index()

  from tqdm.autonotebook import tqdm


Deleting all indexes []...
Done


In [18]:
index_name = 'garminvenumanual'
vector_store = insert_or_fetch_embeddings(index_name)

Creating index garminvenumanual and embeddings ...
Done


In [20]:
def ask_and_get_answer(vector_store, q):
    from langchain.chains import RetrievalQA
    from langchain.chat_models import ChatOpenAI

    llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=1)

    retriever = vector_store.as_retriever(search_type='similarity',search_kwargs={'k': 5})  # the 5 most similar chunks

    chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)  #stuff is the default and uses all of the text
    answer = chain.run(q)
    return answer

def ask_with_memory(vector_store, q, chat_history=[]):
    from langchain.chains import ConversationalRetrievalChain
    from langchain.chat_models import ChatOpenAI

    llm = ChatOpenAI(temperature=1)
    retriever = vector_store.as_retriever(search_type='similarity',search_kwargs={'k': 5},)

    crc = ConversationalRetrievalChain.from_llm(llm, retriever)
    result = crc({'question': q, 'chat_history': chat_history})
    chat_history.append((q, result['answer']))

    return result, chat_history

In [21]:
q = "What is this manual for?"
answer = ask_and_get_answer(vector_store, q)
answer

'This manual is for the VENU® 2 SERIES device.'

In [22]:
q = "My step count doesn't seem accurate. How do I fix it?"
answer = ask_and_get_answer(vector_store, q)
answer

"If your step count doesn't seem accurate, there are a few tips you can try to improve accuracy. \n\n1. Remain stationary for a few minutes: Sometimes, if you've been moving constantly, the watch may not accurately track your steps. To improve accuracy, try staying still for a few minutes and then check if the step count updates correctly.\n\n2. Check for repetitive motions: The watch may interpret some repetitive motions, such as washing dishes or folding laundry, as steps. Be aware of these activities that may falsely increase your step count.\n\n3. Calibrate the accelerometer: The accuracy of the step count can improve over time, especially after a few outdoor runs or walks using GPS. The watch's accelerometer is self-calibrating, so the more outdoor activities you track, the better the accuracy will become.\n\nIf these tips don't improve the accuracy of your step count, you can visit garmin.com/ataccuracy for more information about activity tracking and fitness metric accuracy."

# Garmin Venu 2 Test

Q: "What is this manual for?"

A: "This manual is for the VENU® 2 SERIES device."

Q: "My step count doesn't seem accurate. How do I fix it?"

A: "If your step count doesn't seem accurate, there are a few tips you can try to improve accuracy.

Remain stationary for a few minutes: Sometimes, if you've been moving constantly, the watch may not accurately track your steps. To improve accuracy, try staying still for a few minutes and then check if the step count updates correctly.

Check for repetitive motions: The watch may interpret some repetitive motions, such as washing dishes or folding laundry, as steps. Be aware of these activities that may falsely increase your step count.

Calibrate the accelerometer: The accuracy of the step count can improve over time, especially after a few outdoor runs or walks using GPS. The watch's accelerometer is self-calibrating, so the more outdoor activities you track, the better the accuracy will become.

If these tips don't improve the accuracy of your step count, you can visit garmin.com/ataccuracy for more information about activity tracking and fitness metric accuracy."


In [23]:
for line in answer:
    print(line)

I
f
 
y
o
u
r
 
s
t
e
p
 
c
o
u
n
t
 
d
o
e
s
n
'
t
 
s
e
e
m
 
a
c
c
u
r
a
t
e
,
 
t
h
e
r
e
 
a
r
e
 
a
 
f
e
w
 
t
i
p
s
 
y
o
u
 
c
a
n
 
t
r
y
 
t
o
 
i
m
p
r
o
v
e
 
a
c
c
u
r
a
c
y
.
 




1
.
 
R
e
m
a
i
n
 
s
t
a
t
i
o
n
a
r
y
 
f
o
r
 
a
 
f
e
w
 
m
i
n
u
t
e
s
:
 
S
o
m
e
t
i
m
e
s
,
 
i
f
 
y
o
u
'
v
e
 
b
e
e
n
 
m
o
v
i
n
g
 
c
o
n
s
t
a
n
t
l
y
,
 
t
h
e
 
w
a
t
c
h
 
m
a
y
 
n
o
t
 
a
c
c
u
r
a
t
e
l
y
 
t
r
a
c
k
 
y
o
u
r
 
s
t
e
p
s
.
 
T
o
 
i
m
p
r
o
v
e
 
a
c
c
u
r
a
c
y
,
 
t
r
y
 
s
t
a
y
i
n
g
 
s
t
i
l
l
 
f
o
r
 
a
 
f
e
w
 
m
i
n
u
t
e
s
 
a
n
d
 
t
h
e
n
 
c
h
e
c
k
 
i
f
 
t
h
e
 
s
t
e
p
 
c
o
u
n
t
 
u
p
d
a
t
e
s
 
c
o
r
r
e
c
t
l
y
.




2
.
 
C
h
e
c
k
 
f
o
r
 
r
e
p
e
t
i
t
i
v
e
 
m
o
t
i
o
n
s
:
 
T
h
e
 
w
a
t
c
h
 
m
a
y
 
i
n
t
e
r
p
r
e
t
 
s
o
m
e
 
r
e
p
e
t
i
t
i
v
e
 
m
o
t
i
o
n
s
,
 
s
u
c
h
 
a
s
 
w
a
s
h
i
n
g
 
d
i
s
h
e
s
 
o
r
 
f
o
l
d
i
n
g
 
l
a
u
n
d
r
y
,
 
a
s
 
s
t
e
p
s
.
 
B
e
 
a
w
a
r
e
 
o
f
 
t
h
e
s
e
 
a
c
t
i
v
i
t
