In [1]:
import logging

logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
logging.getLogger("haystack").setLevel(logging.INFO)

In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

OPEN_SEA_API_KEY=os.getenv('OPEN_SEA_API_KEY')
PINECONE_API_KEY=os.getenv('PINECONE_API_KEY')

ModuleNotFoundError: No module named 'dotenv'

In [None]:
from datasets import load_dataset

dataset = load_dataset("code_search_net", "python")

In [None]:
from haystack import Document

titles = dataset["func_name"]
texts = [" ".join(text) for text in dataset["func_code_tokens"]]

documents = []
for title, text in zip(titles, texts):
    documents.append(Document(content=text, meta={"name": title or ""}))

In [None]:
from haystack.document_stores import PineconeDocumentStore

document_store = PineconeDocumentStore(
    api_key=PINECONE_API_KEY,
    index='NLP-Open-Source',
    similarity="cosine",
    embedding_dim=768,
    progress_bar=False,
)

In [None]:
from haystack.nodes import EmbeddingRetriever

retriever = EmbeddingRetriever(
    document_store=document_store,
    query_embedding_model="CarperAI/carptriever-1",
)

In [None]:
document_store.write_documents(documents)

document_store.update_embeddings(
    retriever,
    batch_size=16
)

In [None]:
from haystack.nodes import OpenAIAnswerGenerator

generator = OpenAIAnswerGenerator(api_key=OPEN_SEA_API_KEY)

In [None]:
from haystack.pipelines import GenerativeQAPipeline

pipeline = GenerativeQAPipeline(generator=generator, retriever=retriever)