# OpenAI + Pinecone

## Searching with populated context 

Uses the indexed embeddings in pinecone, to fetch closest matching paragraphs, to provide OpenAI with more context, during QnA

### Used libraries
- OpenAI: For creating embeddings from paragraphs within chapters
- Pinecone: Used to store and search by cosine-similarity across embeddings
- Retry: Used for retrying failures accessing APIs

In [1]:
import openai
import os
import pinecone
index_name = 'sheshadri-swamigalv1'
embed_model = "text-embedding-ada-002"
openai.api_key = os.environ["API"]
from retry import retry

limit = 3750
@retry(tries=10)
def retrieve(query):
    res = openai.Embedding.create(
        input=[query],
        engine=embed_model
    )
    # retrieve from Pinecone
    xq = res['data'][0]['embedding']
    # get relevant contexts
    res = index.query(xq, top_k=3, include_metadata=True)
    contexts = [
        x['metadata']['text'] for x in res['matches']
    ]
    # build our prompt with the retrieved contexts included
    prompt_start = (
        "Answer the question based on the context below.\n\n"+
        "Context:\n"
    )
    prompt_end = (
        f"\n\nQuestion: {query}\nAnswer:"
    )
    prompt=""
    # append contexts until hitting limit
    for i in range(1, len(contexts)):
        if len("\n\n---\n\n".join(contexts[:i])) >= limit:
            prompt = (
                prompt_start +
                "\n\n---\n\n".join(contexts[:i-1]) +
                prompt_end
            )
            break
        elif i == len(contexts)-1:
            prompt = (
                prompt_start +
                "\n\n---\n\n".join(contexts) +
                prompt_end
            )
    return prompt
def complete(prompt):
    # query text-davinci-003
    res = openai.Completion.create(
        engine='text-davinci-003',
        prompt=prompt,
        temperature=0,
        max_tokens=400,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None
    )
    return res['choices'][0]['text'].strip()
pinecone.init(
        api_key=os.environ["PC_API"],
        environment="us-east1-gcp"
    )
index = pinecone.Index(index_name)
print(complete(retrieve("What is the area of kanchi district?")))

400 Sq. Miles


  from tqdm.autonotebook import tqdm
