In [1]:
import os
import openai
import sys

In [2]:
from dotenv import load_dotenv, find_dotenv
load_dotenv() # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']
openai.organization = os.getenv("OPENAI_ORGANIZATION")

In [3]:
from langchain.vectorstores import SKLearnVectorStore
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
import tempfile

In [4]:
# Load PDF
loaders = [
    # Duplicate documents on purpose - messy data
    PyPDFLoader("docs/cs229_lectures/MachineLearning-Lecture01.pdf"),
    PyPDFLoader("docs/cs229_lectures/MachineLearning-Lecture01.pdf"),
    PyPDFLoader("docs/cs229_lectures/MachineLearning-Lecture02.pdf"),
    PyPDFLoader("docs/cs229_lectures/MachineLearning-Lecture03.pdf")
]
docs = []
for loader in loaders:
    docs.extend(loader.load())

In [5]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
persist_path = os.path.join(tempfile.gettempdir(), "union.parquet")

vector_store = SKLearnVectorStore.from_documents(
    documents=docs,
    embedding=embeddings,
    persist_path=persist_path,  # persist_path and serializer are optional
    serializer="parquet",
)

In [10]:
query = "slope down"
results = vector_store.similarity_search(query)
print(results[0].page_content)

So here's the gradient descent algorithm. I'm going to choose some initial point. It could 
be vector of all zeros or some randomly chosen  point. Let's say we start from that point 
denoted by the star, by the cross, and now I wa nt you to imagine that this display actually 
shows a 3D landscape. Imagine you're all in a hi lly park or something, and this is the 3D 
shape of, like, a hill in some park.  
So imagine you're actually standing physically at the position of that star, of that cross, 
and imagine you can stand on that hill, ri ght, and look all 360 degrees around you and 
ask, if I were to take a small step, what would allow me to go downhill the most? Okay, just imagine that this is physically a hill and you're standing there, and would look around 
ask, "If I take a small step, what is the direc tion of steepest descent, that would take me 
downhill as quickly as possible?"  
So the gradient descent algorith m does exactly that. I'm going to take a small step in this 
dire

In [14]:
results

[Document(page_content='So here\'s the gradient descent algorithm. I\'m going to choose some initial point. It could \nbe vector of all zeros or some randomly chosen  point. Let\'s say we start from that point \ndenoted by the star, by the cross, and now I wa nt you to imagine that this display actually \nshows a 3D landscape. Imagine you\'re all in a hi lly park or something, and this is the 3D \nshape of, like, a hill in some park.  \nSo imagine you\'re actually standing physically at the position of that star, of that cross, \nand imagine you can stand on that hill, ri ght, and look all 360 degrees around you and \nask, if I were to take a small step, what would allow me to go downhill the most? Okay, just imagine that this is physically a hill and you\'re standing there, and would look around \nask, "If I take a small step, what is the direc tion of steepest descent, that would take me \ndownhill as quickly as possible?"  \nSo the gradient descent algorith m does exactly that. I\'m