<a href="https://colab.research.google.com/github/evanhkim/Doccou/blob/master/conv_bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# gpu setting is on in the notebook setting. That is why you can see the following gpu is assigned for this notebook. 
! nvidia-smi

Fri Apr 21 01:05:42 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   44C    P8     9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!pip install streamlit langchain faiss-cpu sentence_transformers pypdf huggingface_hub > /dev/null

In [None]:
import re
import time
from typing import Any, Dict, List
from getpass import getpass
import os
from google.colab import drive
from sentence_transformers import SentenceTransformer

from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import VectorStore
from langchain.vectorstores import FAISS
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain import HuggingFaceHub
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.chains import RetrievalQAWithSourcesChain

In [None]:
# Create embedding vectors via nli-mpnet-base-v2, which is considered one of the 
# best embedding models that are publically available
sentences = ["This is an example sentence", "Each sentence is converted"]

model_embedding = SentenceTransformer('sentence-transformers/nli-mpnet-base-v2')
embeddings = model_embedding.encode(sentences)
print(embeddings)

Downloading (…)bacfb/.gitattributes:   0%|          | 0.00/690 [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)272bbbacfb/README.md:   0%|          | 0.00/3.66k [00:00<?, ?B/s]

Downloading (…)2bbbacfb/config.json:   0%|          | 0.00/587 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading (…)bacfb/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

Downloading (…)272bbbacfb/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)bbbacfb/modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

[[ 0.1170756  -0.407476    0.07239146 ... -0.11628885  0.07247799
  -0.04741186]
 [ 0.12877965 -0.21258678  0.08189406 ... -0.18847789 -0.15635683
  -0.06030356]]


In [None]:
# access firsttech's master guide doc stored in my google drive 
drive.mount('/drive')
file_path = '/drive/My Drive/Colab Notebooks/masterguide.pdf'

############################
# pdfminer.six is benchmarked to perform better.
# Try this alternative. It will improve the retrieval quality.
# https://github.com/pdfminer/pdfminer.six
############################

loader = PyPDFLoader(file_path)
pages = loader.load_and_split()

# print the 6th page of the master guide. 
pages[5]

Mounted at /drive


Document(page_content='CMX is t he Solution                                     \nhttps://install.myfirstech.com  \nCMX Master Guide \nCopyright 2020 Firstech, LLC.                                                                   “Hello, my name is Inigo Montoya....”  Page 6  \n  \nControl Module stats \n \n- 5mA idle current draw (CM only, no accessories) \n- 7-10mA additional current draw when adding a Firstech antenna \n- 5-7mA additional current draw when adding a Firstech DAS/DASII \n- 5-7mA additional current draw when adding an Idatalink interface module (Blade, ALCA) \n- POC 2, POC 3 , provide 500mA output \n- All other (-) negative outputs provide 250mA, and each one is diode isolated from the other. They can be \ndoubled up to increase output. \n- LC outputs are 3A max current handling. \n- Siren outputs provide 2A MAX (+) positive outputs (2 pin brown CN, 20 pin I/O CN pin 2) \n- CMX offers 3 High Current programmable outputs (HPC) \n- CMX offers 11 programmable output chan

In [None]:
# create indexes by using HuggingFaceEmbeddings() which is actually using 
# sentence-transformers/all-mpnet-base-v2

model_name = "sentence-transformers/all-mpnet-base-v2"
index = FAISS.from_documents(pages, HuggingFaceEmbeddings(model_name=model_name))

Downloading (…)a8e1d/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)0bca8e1d/config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)e1d/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading (…)a8e1d/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Downloading (…)8e1d/train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)bca8e1d/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [None]:
docs = index.similarity_search("turn ignition to the on", k=2)
for doc in docs:
    print(str(doc.metadata["page"]) + ":", doc.page_content)

21: CMX is t he Solution                                     
https://install.myfirstech.com  
CMX Master Guide 
Copyright 2020 Firstech, LLC.                                                                   “Hello, my name is Inigo Montoya....”  Page 22  
  
Wiring Descriptions  
 
Connector 1 (CN1): 8-Pin High current power harness included (NOTE: Please see FT-LC1 for a low 
current version of CN1)  
Pin 1 Red - Constant 12V positive (+) power input (25A MAX):  (This input provides power to the CM 
processor, Ignition 1, and accessory ports)  This wire MUST be connected. The proper vehicle wire will 
test (+) 12V at all times, even when the key is in the off position, on position, and during crank. 
 
 
Pin 2 Green/White – High Current (HC) Programmable Output (10A MAX): This positive (+) parking light 
(default setting)  wire activates with lock, unlock, remote start, or during troubleshooting diagnostics. 
Note: This output is programmable and can provide several (+) outputs base

In [None]:
# When prompted, go to this website and get the token. https://huggingface.co/settings/tokens
HUGGINGFACEHUB_API_TOKEN = getpass()
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

··········


In [None]:
repo_id = "google/flan-t5-base" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options
llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature":0, "max_length":64})

In [None]:
chain = load_qa_with_sources_chain(llm, chain_type="stuff")

In [None]:
chain = RetrievalQAWithSourcesChain.from_chain_type(llm, chain_type="stuff", retriever=index.as_retriever())

In [None]:
query = 'how to start the engine'

In [None]:
# from index, print the closest k pages
docs = index.similarity_search(query, k=2)
for doc in docs:
    print(str(doc.metadata["page"]) + ":", doc.page_content)

18: CMX is t he Solution                                     
https://install.myfirstech.com  
CMX Master Guide 
Copyright 2020 Firstech, LLC.                                                                   “Hello, my name is Inigo Montoya....”  Page 19  
  
Alternator Sensing  
Alternator sensing is another method the remote start can utilize to determine if the engine is running. This is 
option requires a wire (yellow/black) connection to the alternator stator wire. NOTE: there is no 
programming procedure needed for this option  
 
STEP 1: Change Option 2-10 to setting 2 - Alternator sensing. 
 
 
STEP 2: Test wire and make connection. The stator wire is found at the vehicle’s alternator. Change 
your multimeter to DC voltage before testing for this wire. 
A. At rest, with the ignition off, the stator wire should test 0V DC. 
B. Turn the ignition to the run position. The stator wire should now test between 4 – 6V DC. 
C. Start the vehicle with the key. The stator wire should now 

In [None]:
# llm model summary answer output based on index research results. not sure the defualt k value
chain({"question": query}, return_only_outputs=True)

{'answer': 'PIN # 4 at the standard OBD II connection.', 'sources': ''}