In [2]:
# Install dependencies
%pip install -q langchain langchain-core google-genai python-dotenv langchain_google_genai pypdf


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


### Load environment variables

In [3]:
from dotenv import load_dotenv
load_dotenv()

True

### Load documents

In [4]:
from langchain_community.document_loaders import PyPDFLoader

file_path = "./example_data/llmops.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

print(len(docs))

508


### Split to chunks

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    add_start_index=True,
)

all_splits = text_splitter.split_documents(docs)

### Embedding

In [6]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

### Vector store

In [8]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

In [9]:
ids = vector_store.add_documents(documents=all_splits)

In [10]:
results = vector_store.similarity_search(
    "What is LLMOps?"
)

print(results[0])

page_content='From MLOps to LLMOps: Why Do We Need a
New Framework?
There is some overlap between MLOps and LLMOps; both
deal with the operational lifecycles of ML models, after all.
They also share common principles in terms of managing
ML workflows. However, the two frameworks diverge in
their primary focuses and objectives. While MLOps handles
non-generative models (both language and computer
vision), LLMOps deals with generative language models—
and thus with mammoth levels of complexity. The
complexity of these models owes not only to their scale and
architecture but also to the unique processes involved in
data engineering, domain adaptation, evaluation, and
monitoring for them. The key distinctions are apparent in
LLMs’ prediction transparency, latency, and memory and
computational requirements.
Perhaps the biggest difference is the shift in how end users
consume these models. Non-generative ML models are
predictive tools used for passive consumption, such as in' metadata={'prod

### Retrievers

In [11]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import chain

In [12]:
@chain
def retriever(query: str) -> List[Document]:
    return vector_store.similarity_search(query, k=1)

In [13]:
retriever.batch(
    [
        "Why do we need LLMOps?",
        "How can we manage versioning of prompts?",
    ],
)

[[Document(id='8bbe1189-f865-41af-ab8f-5cc2d4e51112', metadata={'producer': 'calibre 7.16.0', 'creator': 'calibre 7.16.0', 'creationdate': '2025-07-12T07:11:24+00:00', 'author': 'Abi Aryan', 'moddate': '2025-07-12T07:11:24+00:00', 'title': 'LLMOps (for Raymond Rhine)', 'source': './example_data/llmops.pdf', 'total_pages': 508, 'page': 47, 'page_label': '48', 'start_index': 0}, page_content='From MLOps to LLMOps: Why Do We Need a\nNew Framework?\nThere is some overlap between MLOps and LLMOps; both\ndeal with the operational lifecycles of ML models, after all.\nThey also share common principles in terms of managing\nML workflows. However, the two frameworks diverge in\ntheir primary focuses and objectives. While MLOps handles\nnon-generative models (both language and computer\nvision), LLMOps deals with generative language models—\nand thus with mammoth levels of complexity. The\ncomplexity of these models owes not only to their scale and\narchitecture but also to the unique processes i