In [None]:
%pip install -qU \
    openai==1.6.1 \
    pinecone-client==3.1.0 \
    langchain==0.1.1 \
    langchain-community==0.0.13 \
    tiktoken==0.5.2 \

In [1]:
import os
import pandas as pd
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

from getpass import getpass


In [2]:
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') or getpass("OpenAI API Key: ")
PINECONE_KEY = os.getenv("PINECONE_API_KEY") or getpass("Enter your Pinecone API key: ")
INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")


In [None]:
from pinecone import Pinecone

# initialize connection to pinecone (get API key at app.pinecone.io)
api_key = os.getenv("PINECONE_API_KEY") or getpass("Enter your Pinecone API key: ")
# configure client
pc = Pinecone(api_key=api_key)

In [None]:
import time

index_name = os.getenv("PINECONE_INDEX_NAME")
existing_indexes = [
    index_info["name"] for index_info in pc.list_indexes()
]

# connect to index
index = pc.Index(index_name)
time.sleep(1)
# view index stats
index.describe_index_stats()

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings

model_name = "text-embedding-ada-002"

# get openai api key from platform.openai.com
embed = OpenAIEmbeddings(
    model=model_name, openai_api_key=OPENAI_API_KEY, disallowed_special=()
)

In [None]:
from langchain.vectorstores import Pinecone

text_field = "text"  # the metadata field that contains our text

# initialize the vector store object
vectorstore = Pinecone(
    index, embed.embed_query, text_field
)

### Similarity search

Here we can query the vector database for similar documents (No LLM Generation)

In [None]:
query = "TYPE A QUERY HERE"

vectorstore.similarity_search(
    query,  # our search query
    k=3  # return 3 most relevant docs
)

## Retrieval Augement Generation

#### RAG Parameters

In [None]:
TEMPERATURE = 0.5
MODEL = "gpt-4-turbo" # "gpt-4-turbo" or "gpt-4"

#### Initialize LLM Model

In [None]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(
    temperature=TEMPERATURE,
    model_name=MODEL,
    openai_api_key=OPENAI_API_KEY
)

#### Test Basic RAG

In [None]:
from langchain.chains import RetrievalQA

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)

In [None]:
qa.run(query)