In [None]:
# Load environment variables
import os
from dotenv import load_dotenv,find_dotenv
load_dotenv(find_dotenv())

In [None]:
# Check api keys
print(os.getenv('OPENAI_API_KEY'))
print(os.getenv('PINECONE_ENVIRONMENT'))
print(os.getenv('PINECONE_API_KEY'))

In [None]:
# Can import PDFs with lanchain!
# https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf
# Doc to read: https://ntrs.nasa.gov/citations/19660027401 
from langchain.document_loaders import PyPDFLoader

data_folder='../data/'
doc="AMS_2020.pdf"
loader = PyPDFLoader(data_folder+doc)
pages = loader.load_and_split()

for page in pages:
    page.metadata['source']=doc
    page.metadata['page']=page.metadata['page']+1

In [None]:
# Import and instantiate OpenAI embeddings
from langchain.embeddings import OpenAIEmbeddings
embeddings_model = OpenAIEmbeddings(model="text-embedding-ada-002")

In [None]:
# Import and initialize Pinecone client
import pinecone
import os
from langchain.vectorstores import Pinecone

pinecone.init(
    api_key=os.getenv('PINECONE_API_KEY'),
    environment=os.getenv('PINECONE_ENVIRONMENT') 
)
pinecone.whoami()

In [None]:
# Upload vectors to Pinecone
index_name = "langchain-quickstart"
index=pinecone.Index(index_name)

# Clear the index first, then upload
index.delete(delete_all=True)
vectorstore = Pinecone.from_documents(pages, embeddings_model, index_name=index_name)