In [178]:
import os
import openai

# get API key from top-right dropdown on OpenAI website
openai.api_key = os.getenv("OPENAI_API_KEY") or "OPENAI_API_KEY"

# api_key = os.getenv("OPENAI_API_KEY")

openai.Engine.list()  # check we have authenticated

<OpenAIObject list at 0x1cc8d2f4e50> JSON: {
  "data": [
    {
      "created": null,
      "id": "whisper-1",
      "object": "engine",
      "owner": "openai-internal",
      "permissions": null,
      "ready": true
    },
    {
      "created": null,
      "id": "babbage",
      "object": "engine",
      "owner": "openai",
      "permissions": null,
      "ready": true
    },
    {
      "created": null,
      "id": "davinci",
      "object": "engine",
      "owner": "openai",
      "permissions": null,
      "ready": true
    },
    {
      "created": null,
      "id": "text-davinci-edit-001",
      "object": "engine",
      "owner": "openai",
      "permissions": null,
      "ready": true
    },
    {
      "created": null,
      "id": "babbage-code-search-code",
      "object": "engine",
      "owner": "openai-dev",
      "permissions": null,
      "ready": true
    },
    {
      "created": null,
      "id": "text-similarity-babbage-001",
      "object": "engine",
      "owner":

In [179]:
def complete(prompt):
    # query text-davinci-003
    res = openai.Completion.create(
        engine='text-davinci-003',
        prompt=prompt,
        temperature=0,
        max_tokens=400,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None
    )
    return res['choices'][0]['text'].strip()

In [181]:
embed_model = "text-embedding-ada-002"

res = openai.Embedding.create(
    input=[
        "Sample document text goes here",
        "there will be several phrases in each batch"
    ], engine=embed_model
)

# initiating pinecone

In [182]:
import pinecone

# initialize connection to pinecone (get API key at app.pinecone.io)
api_key = os.getenv("PINECONE_API_KEY") or "PINECONE_API_KEY"
# find your environment next to the api key in pinecone console
env = os.getenv("PINECONE_ENVIRONMENT") or "PINECONE_ENVIRONMENT"

pinecone.init(api_key = api_key, environment = env)
# pinecone.whoami()

In [183]:
pinecone.list_indexes()

['car-repair-docs']

In [184]:
index = pinecone.Index("car-repair-docs")


In [185]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 83}},
 'total_vector_count': 83}

# pdf reader

In [186]:
import PyPDF2

In [187]:
# Open the PDF file in binary mode
with open('Crawfords_Auto_Repair_Guide.pdf', 'rb') as pdf_file:
    # Create a PDF file reader object
    pdf_reader = PyPDF2.PdfReader(pdf_file)

    num_pages = len(pdf_reader.pages)
    # Number of pages in the PDF file
    print("No. of Pages:", num_pages)


No. of Pages: 84


In [188]:
pdf_file = 'Crawfords_Auto_Repair_Guide.pdf'
pdf_reader = PyPDF2.PdfReader(pdf_file)

In [189]:
num_pages = len(pdf_reader.pages)
num_pages

84

In [190]:
pdf_data = []
for page_num in range(num_pages):
    page_obj = pdf_reader.pages[page_num]
    page_dict = {
        'document': pdf_file,
        'page_num': page_num,
        'text_length': len(page_obj.extract_text()),
        'text': page_obj.extract_text()
    }
    pdf_data.append(page_dict)

In [191]:
import pandas as pd
df = pd.DataFrame(pdf_data)


In [192]:
df.drop(df[df['text_length'] == 0].index, inplace=True)

In [193]:
df['text_length'].describe()

count      83.000000
mean     1470.530120
std       864.833994
min        71.000000
25%       885.000000
50%      1365.000000
75%      1946.500000
max      3502.000000
Name: text_length, dtype: float64

In [194]:
df = df.reset_index().rename(columns={'index': 'idx'})


In [195]:
df.to_csv('Crawfords_Auto_Repair_Guide.csv', index=False)

In [196]:
pdf_data_clean = df.to_dict(orient='records', index=True)

In [197]:
pdf_data_clean[2]

{'idx': 2,
 'document': 'Crawfords_Auto_Repair_Guide.pdf',
 'page_num': 2,
 'text_length': 1539,
 'text': 'The Crawford\x01s Auto Repair Guide to Beginner\x01s Auto Maintenance & Repair \nBy Jeff Crawford, Crawford\x01s Auto Repair \nContributing Author: Rex Kimball, Mirex Marketing \nText copyright © Jeff Crawford, Rex Kimball \nAll rights reserved for printed and all digital formats. The following license is granted in \nPDF: \nThis book is available in PDF for free, and it can be reproduced, shared and sold for \nprofit royalty-free in PDF, but it a cannot be changed or adapted in content or format without the authors\x01 consent. Go go http://www.crawfordsautoservice.com/crawfords- \nauto-repair-guide-free-ebook/  to download for free. \nMany of the images in this book are protected by copyright as cited. Some images are \nin the creative commons with \x02free use\x03 licenses. Some are in the public domain or \npresumed to be in the public domain. Claims of any inaccuracies should

In [198]:
def upsert_pdf_data(pdf_data_clean, batch_size=5):

    from tqdm.auto import tqdm
    from time import sleep

    for i in tqdm(range(0, len(pdf_data_clean), batch_size)):
        # find end of batch
        i_end = min(len(pdf_data_clean), i + batch_size)
        print("i, i_end: ", i, i_end)
        meta_batch = pdf_data_clean[i:i_end] # chunk the data into batches
        # get ids
        ids_batch = [str(x['idx']) for x in meta_batch] # must be string
        # get texts to encode
        texts = [x['text'] for x in meta_batch]
        
        # create embeddings (try-except added to avoid RateLimitError)
        try:
            # print('Trying embedding')
            res = openai.Embedding.create(input=texts, engine=embed_model)  
        except:
            print('Error on first try, sleeping for 5 seconds')
            done = False
            while not done:
                sleep(5)
                print('sleeping for 5 second')
                try:
                    print('trying again...')
                    res = openai.Embedding.create(input=texts, engine=embed_model)
                    done = True
                except:
                    pass

        embeds = [record['embedding'] for record in res['data']]
        # print(x)
        # print(meta_batch)

        new_meta_batch = [{'document': x['document'], 
                            'page_num': x['page_num'], 
                            'text_length': x['text_length'],
                            'text': x['text']} 
                            for x in meta_batch]
        # print(new_meta_batch)

        to_upsert = list(zip(ids_batch, embeds, new_meta_batch))

        # upsert to Pinecone
        index.upsert(vectors = to_upsert) # upsert is a combination of insert and update

In [199]:
query = "How do I change my oil?"

def retrieve_from_query(query, top_k=2):
    # retrieve from Pinecone
    embedded_question = openai.Embedding.create(\
        input=[query], engine=embed_model)['data'][0]['embedding']

    # get relevant contexts (including the questions)
    query_result = index.query(embedded_question, top_k=top_k, include_metadata=True)
    return query_result

In [200]:
def retrieve_relevant_text(query_result):
    # get relevant contexts (including the questions)
    texts = [x['metadata']['text'] for x in query_result['matches']]
    context = '\n\n'.join(texts)
    return context

In [177]:
def get_source_info(query_result):
    info=""
    for x in query_result['matches']:
        document = x['metadata']['document']
        page_num = x['metadata']['page_num']
        info += "Document: " + document + ",  Page #: " + str(page_num) + "\n"
        
    return info

# print(get_source_info(query_result))

In [203]:
query = "How do I change my oil?"

query_result = retrieve_from_query(query, top_k=2)
text = retrieve_relevant_text(query_result)
source_info = get_source_info(query_result)
# print(query_result)

print(text)
print(source_info)

Crawfords Guide to Beginners Auto Maintenance & Repair  www.CrawfordsAutoService.com 
17 Engine Oil . Engine oil should be checked each time the vehicle is refueled (i.e. each 
time you go to the gas station). Most engines, but not all, have a dipstick to indicate the 
oil level. Typically the handle is yellow. Follow these steps to check the oil: 
Turn the engine off. 
Remove the dipstick. 
Wipe off the end of the dipstick with a rag or paper towel. 
Put the dipstick back in. 
Take it out to look at the level at the tip 
The stick will have marks on it. The add mark typically indicates one quart low. If an 
engine is leaking oil then the price of repairs can vary depending on which repair is needed and the make and model of the vehicle. 
Oil dipstick (above) and oil level on a dipstick (below) 
Image credits: both CC-BY-SA-Dvortygirl on Wikipedia 


Crawfords Guide to Beginners Auto Maintenance & Repair  www.CrawfordsAutoService.com 
45 Engine Oil 
Sample of motor oil 
The pu

In [204]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 83}},
 'total_vector_count': 83}

In [247]:
chat_response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo", 
    messages=[
        {"role": "system", "content": "You are a chatbot."},
        {"role": "user", "content":"hello" }])

In [248]:
chat_response.choices[0].message.content

'Hello! How can I assist you today?'

In [236]:
system_prompt_template = """You are a expert mechanic. You are helping a customer with a car repair problem.\
You answer the customer's question and provide a snippet to a relevant page in the repair manual.\
The relevant page of the specific repair manual is provided below.\n \
Repair manual:\n {context} \n\n
proceed to answer the customer's question.\n\n"""

In [220]:
context = 'sdkfhassadkfjba;sgb'
prompt = system_prompt_template.format(context=context)
print(prompt)

 You are a expert mechanic. You are helping a customer with a car repair problem.You answer the customer's question and provide a snippet to a relevant page in the repair manual.The relevant page of the specific repair manual is provided below delimited in triple backticks.
 Repair manual ```sdkfhassadkfjba;sgb``` 


proceed to answer the customer's question below.


In [255]:
def answer_question(question, system_prompt_template, retrieved_text):

    system_prompt = system_prompt_template.format(context=retrieved_text)
    # print(system_prompt)
    # print(question)
    chat_response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo", 
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": question }])

    answer = chat_response.choices[0].message.content
    return answer

In [256]:
answer_question("How do I change my oil?", system_prompt_template, context)

"To change your oil, you will need to locate the oil drain plug and the oil filter on your vehicle. Here are the general steps to follow:\n\n1. Gather the necessary tools and supplies: a wrench or socket set, an oil filter wrench, a drain pan, a new oil filter, and the correct type and amount of engine oil as specified in your owner's manual.\n\n2. Park your vehicle on a flat surface and turn off the engine. Make sure it has cooled down to avoid any potential burns.\n\n3. Locate the oil drain plug underneath the engine. Place the drain pan directly beneath it to catch the old oil.\n\n4. Use a wrench or socket set to carefully loosen and remove the drain plug. Allow the old oil to completely drain into the pan.\n\n5. While the oil is draining, locate the oil filter on the engine. Use an oil filter wrench to remove it, turning it counterclockwise.\n\n6. Once the old oil has finished draining, replace the drain plug and tighten it securely with your wrench or socket set.\n\n7. Before inst

In [229]:
query = "How do I change my oil?"
query_result = retrieve_from_query(query, top_k=2)
retrieved_text = retrieve_relevant_text(query_result)

In [257]:
retrieved_text

"Crawford\x01s Guide to Beginners Auto Maintenance & Repair  www.CrawfordsAutoService.com \n17 Engine Oil . Engine oil should be checked each time the vehicle is refueled (i.e. each \ntime you go to the gas station). Most engines, but not all, have a dipstick to indicate the \noil level. Typically the handle is yellow. Follow these steps to check the oil: \n\uf0b7Turn the engine off. \n\uf0b7Remove the dipstick. \n\uf0b7Wipe off the end of the dipstick with a rag or paper towel. \n\uf0b7Put the dipstick back in. \n\uf0b7Take it out to look at the level at the tip \nThe stick will have marks on it. The \x02add\x03 mark typically indicates one quart low. If an \nengine is leaking oil then the price of repairs can vary depending on which repair is needed and the make and model of the vehicle. \nOil dipstick (above) and oil level on a dipstick (below) \nImage credits: both CC-BY-SA-Dvortygirl on Wikipedia \n\n\nCrawford\x01s Guide to Beginners Auto Maintenance & Repair  www.CrawfordsAutoSe

In [245]:
answer_question(query, system_prompt_template, retrieved_text)

You are a expert mechanic. You are helping a customer with a car repair problem.You answer the customer's question and provide a snippet to a relevant page in the repair manual.The relevant page of the specific repair manual is provided below.
 Repair manual:
 Crawfords Guide to Beginners Auto Maintenance & Repair  www.CrawfordsAutoService.com 
17 Engine Oil . Engine oil should be checked each time the vehicle is refueled (i.e. each 
time you go to the gas station). Most engines, but not all, have a dipstick to indicate the 
oil level. Typically the handle is yellow. Follow these steps to check the oil: 
Turn the engine off. 
Remove the dipstick. 
Wipe off the end of the dipstick with a rag or paper towel. 
Put the dipstick back in. 
Take it out to look at the level at the tip 
The stick will have marks on it. The add mark typically indicates one quart low. If an 
engine is leaking oil then the price of repairs can vary depending on which repair is needed and the make and model

InvalidRequestError: Unrecognized request argument supplied: messages