# Lesson 3: Vectorstores and embeddings

# Vectorstore ingestion

In [None]:
import "dotenv/config";

In [1]:
import { OllamaEmbeddings } from "@langchain/ollama";

const embeddings = new OllamaEmbeddings({
  model: "llama3.1:latest",
  baseUrl: "http://localhost:11434",
});

In [2]:
import { similarity } from "ml-distance";

const vector1 = await embeddings.embedQuery(
    "What are vectors useful for in machine learning?"
);
const unrelatedVector = await embeddings.embedQuery(
    "A group of parrots is called a pandemonium."
);

In [3]:
similarity.cosine(vector1, unrelatedVector);

[33m0.43314176961059175[39m

In [4]:
// Peer dependency
import * as parse from "pdf-parse";
import { PDFLoader } from "@langchain/community/document_loaders/fs/pdf";
import { 
    RecursiveCharacterTextSplitter
} from "langchain/text_splitter";

const loader = new PDFLoader("./data/MachineLearning-Lecture01.pdf");

const rawCS229Docs = await loader.load();

const splitter = new RecursiveCharacterTextSplitter({
  chunkSize: 128,
  chunkOverlap: 0,
});

const splitDocs = await splitter.splitDocuments(rawCS229Docs);

In [5]:
import { MemoryVectorStore } from "langchain/vectorstores/memory";

const vectorstore = new MemoryVectorStore(embeddings);

In [6]:
await vectorstore.addDocuments(splitDocs);

In [7]:
const retrievedDocs = await vectorstore.similaritySearch(
    "What is deep learning?", 
    4
);

const pageContents = retrievedDocs.map(doc => doc.pageContent);

pageContents

[
  [32m"piece of research in machine learning, okay?"[39m,
  [32m"another supervised learning problem and another classification problem."[39m,
  [32m"In contrast, in an unsupervised learning problem, this is the sort of data you get, okay?"[39m,
  [32m"algorithm and have a computer learn by itself how to, say, recognize your handwriting."[39m
]

# Retrievers

In [8]:
const retriever = vectorstore.asRetriever();

In [9]:
await retriever.invoke("What is deep learning?")

[
  Document {
    pageContent: [32m"piece of research in machine learning, okay?"[39m,
    metadata: {
      source: [32m"./data/MachineLearning-Lecture01.pdf"[39m,
      pdf: {
        version: [32m"1.10.100"[39m,
        info: {
          PDFFormatVersion: [32m"1.4"[39m,
          IsAcroFormPresent: [33mfalse[39m,
          IsXFAPresent: [33mfalse[39m,
          Title: [32m""[39m,
          Author: [32m""[39m,
          Creator: [32m"PScript5.dll Version 5.2.2"[39m,
          Producer: [32m"Acrobat Distiller 8.1.0 (Windows)"[39m,
          CreationDate: [32m"D:20080711112523-07'00'"[39m,
          ModDate: [32m"D:20080711112523-07'00'"[39m
        },
        metadata: Metadata { _metadata: [36m[Object: null prototype][39m },
        totalPages: [33m22[39m
      },
      loc: { pageNumber: [33m8[39m, lines: { from: [33m2[39m, to: [33m2[39m } }
    },
    id: [90mundefined[39m
  },
  Document {
    pageContent: [32m"another supervised learning prob