# Environment Variable

In [None]:
import getpass
import os
import dotenv

In [None]:
dotenv.load_dotenv(dotenv.find_dotenv())

# Load and Split

In [None]:
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.pinecone import Pinecone

In [None]:
from langchain.document_loaders import TextLoader

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=180,
)

loader = TextLoader("./files/sample.txt")
docs = loader.load_and_split(text_splitter=splitter)
embeddings = OpenAIEmbeddings()

In [None]:
docs[0].page_content

# Pinecone

In [None]:
import pinecone

# initialize pinecone
pinecone.init(
    api_key=os.getenv("PINECONE_API_KEY"),  # find at app.pinecone.io
    environment=os.getenv("PINECONE_ENV"),  # next to api key in console
)

In [None]:
# First, check if our index already exists. If it doesn't, we create it
index_name = "open-ai"
if index_name not in pinecone.list_indexes():
    # we create a new index
    pinecone.create_index(name=index_name, metric="cosine", dimension=1536)

In [None]:
# The OpenAI embedding model `text-embedding-ada-002 uses 1536 dimensions`
docsearch = Pinecone.from_documents(docs, embeddings, index_name=index_name)

In [None]:
# if you already have an index, you can load it like this
docsearch = Pinecone.from_existing_index(index_name, embeddings)

In [None]:
query = "What did the president say about Ketanji Brown Jackson"
docs = docsearch.similarity_search(query)