In [1]:
import yaml
import tiktoken

from langchain import OpenAI, VectorDBQA, LLMChain
from langchain.prompts import PromptTemplate

from pdf_loaders import PdfToTextLoader
from dataset_vectorizers import DatasetVectorizer

with open("config.yml", "r") as f:
    config = yaml.safe_load(f)

OPENAI_API_KEY = config['OPENAI_KEY']

## Loading data from PDF

In [2]:
# data taken from https://www.freelancersunion.org/insurance/health/bronze-plans-nyc/
PDFS, NAMES, TXTS  = [
    './data/hyosung.pdf',
], [
    'HyosungManual',
], []


In [3]:
for pdf_path in PDFS:
    txt_path = pdf_path.replace(".pdf", ".txt")
    pdf_loader = PdfToTextLoader(pdf_path, txt_path)
    text = pdf_loader.load_pdf()
    TXTS.append(txt_path)

## Vectorizing dataset

In [4]:
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 500

In [7]:
dataset_vectorizer = DatasetVectorizer()
documents_1, texts_1, docsearch_1 = dataset_vectorizer.vectorize([TXTS[0]], chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, openai_key=OPENAI_API_KEY)


## Asking questions

In [144]:
QUESTIONS = [
    "When i click on the started, i hear a sound of a spark but the moto doesn't start, what is the problem ?"
]

In [123]:
result = docsearch_1._collection.query(query_texts=QUESTIONS)
pprint(result)

{'distances': [[0.3228591978549957,
                0.3299810290336609,
                0.33232802152633667,
                0.3430679440498352,
                0.34917712211608887,
                0.36154690384864807,
                0.362285852432251,
                0.36243781447410583,
                0.36305689811706543,
                0.36877235770225525],
               [0.32797783613204956,
                0.33691608905792236,
                0.3397848904132843,
                0.3491109013557434,
                0.3665180206298828,
                0.36716538667678833,
                0.38054680824279785,
                0.38070181012153625,
                0.3847043514251709,
                0.38479456305503845],
               [0.2907882332801819,
                0.3186514675617218,
                0.32647505402565,
                0.32844775915145874,
                0.3318900167942047,
                0.3394894599914551,
                0.3545929789543152,
                

In [65]:
from pprint import pprint

In [118]:
summary_of_answers = "".join(result["documents"][0])

In [145]:
llm = OpenAI(model_name='text-davinci-003', temperature=0, openai_api_key=OPENAI_API_KEY)
qa_chain_1 = VectorDBQA.from_chain_type(llm=llm, chain_type='stuff', vectorstore=docsearch_1)



In [146]:
summary_of_answers = ""
for q in QUESTIONS:
    print(q)
    answer_1 = qa_chain_1.run(q)
    summary_of_answers += "Question: " + q + "\n"
    summary_of_answers += f"{NAMES[0]} answer: " + answer_1
    print(NAMES[0], answer_1)

When i click on the started, i hear a sound of a spark but the moto doesn't start, what is the problem ?
HyosungManual  It is likely that the ignition switch is not set to the "ON" position, or the ECU is not receiving a signal from the devices. It is recommended to check the ECU for any memorized malfunction codes.


In [147]:
encoder = tiktoken.encoding_for_model("gpt-3.5-turbo")
len(encoder.encode(summary_of_answers))

80

In [148]:
print(summary_of_answers)

Question: When i click on the started, i hear a sound of a spark but the moto doesn't start, what is the problem ?
HyosungManual answer:  It is likely that the ignition switch is not set to the "ON" position, or the ECU is not receiving a signal from the devices. It is recommended to check the ECU for any memorized malfunction codes.


## Asking to compare the answers

In [149]:
template = """
I want you to act motor repair expert. 
I have searched the repair manual trying to answer the following questions :

{QUESTIONS}
Here is a list of answers i found in the repair manual
{summary_of_answers}
Please take into account that the answers could go out of scope, so try to stick as much as possible to the elements in the questions
Formulate for me an answer given the previous details
Your answer:
"""

prompt = PromptTemplate(
    input_variables=["QUESTIONS","summary_of_answers"],
    template=template,
)

In [150]:
llm = OpenAI(model_name='text-davinci-003', temperature=0, openai_api_key=OPENAI_API_KEY, request_timeout=60)
chain = LLMChain(llm=llm, prompt=prompt)

In [151]:
answer = chain.run({'summary_of_answers': summary_of_answers, 'QUESTIONS': QUESTIONS})

In [152]:
print(answer)

It sounds like the ignition switch may not be set to the "ON" position, or the ECU is not receiving a signal from the devices. To diagnose the issue, it is recommended to check the ECU for any memorized malfunction codes.


In [54]:
qa_chain_1.vectorstore.persist()