In [None]:
import os
from tqdm.autonotebook import tqdm
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WikipediaLoader
from langchain_openai import OpenAIEmbeddings

In [None]:
from pinecone_connector import PineconeConnector

embeddings = OpenAIEmbeddings(model="text-embedding-3-small", show_progress_bar=True)
load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_ENV = os.getenv("PINECONE_ENV")
pinecone = PineconeConnector(embeddings)
vars(pinecone)

In [None]:
# Create index. Name cannot contain capitals and choose between Serverless or Pod for server_type.
pinecone.create_index("testindex", server_type="serverless",)

In [None]:
# List all indexes in the project
pinecone.list_index()

In [None]:
# Describe an index by name
pinecone.describe_index("testindex")

In [None]:
docs = WikipediaLoader(query="HUNTER X HUNTER", load_max_docs=2).load()

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=100,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)

texts = text_splitter.create_documents(texts=[docs[0].page_content], metadatas=[docs[0].metadata])

In [None]:
# context = docsearch.similarity_search("Anime")
# context = docsearch.similarity_search_with_relevance_scores("Who was the voice actress?")
context = docsearch.similarity_search("What is hunter x hunter?")

# to add another document

context