In [9]:
from langchain_community.document_loaders import PDFPlumberLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

file = "DeepSeek_R1.pdf"

loader = PDFPlumberLoader(file)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(docs)

In [11]:
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings

local_embeddings = OllamaEmbeddings(model="nomic-embed-text")
vectorstore = Chroma.from_documents(documents=all_splits, embedding=local_embeddings)

In [12]:
question = "What is the purpose of the DeepSeek project?"
docs = vectorstore.similarity_search(question)

for doc in docs:
    print(doc.page_content)

engineeringtasks. Asaresult,DeepSeek-R1hasnotdemonstratedahugeimprovement
over DeepSeek-V3 on software engineering benchmarks. Future versions will address
thisbyimplementingrejectionsamplingonsoftwareengineeringdataorincorporating
asynchronousevaluationsduringtheRLprocesstoimproveefficiency.
16
DeepSeek-R1avoidsintroducinglengthbiasduringGPT-basedevaluations,furthersolidifying
itsrobustnessacrossmultipletasks.
On math tasks, DeepSeek-R1 demonstrates performance on par with OpenAI-o1-1217,
surpassingothermodelsbyalargemargin. Asimilartrendisobservedoncodingalgorithm
tasks,suchasLiveCodeBenchandCodeforces,wherereasoning-focusedmodelsdominatethese
benchmarks. Onengineering-orientedcodingtasks,OpenAI-o1-1217outperformsDeepSeek-R1
first open research to validate that reasoning capabilities of LLMs can be incentivized
purelythroughRL,withouttheneedforSFT.Thisbreakthroughpavesthewayforfuture
advancementsinthisarea.
• We introduce our pipeline to develop DeepSeek-R1. The pipeline incorporates

In [13]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama

model = ChatOllama(
    model="deepseek-r1:1.5b",
)

prompt = ChatPromptTemplate.from_template(
    "Summarize the main themes in these retrieved docs: {docs}"
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

chain = {"docs": format_docs} | prompt | model | StrOutputParser()
question = "What is the purpose of the DeepSeek project?"
docs = vectorstore.similarity_search(question)
chain.invoke(docs)

"<think>\nOkay, so the user wants me to summarize the main themes from the retrieved documents about DeepSeek-R1. Let me read through what they provided.\n\nFirst, there's a mention that DeepSeek-R1 hasn't improved as much on software engineering benchmarks compared to DeepSeek-V3. This is interesting because it suggests potential areas for improvement in its AI capabilities, especially in technical fields. The result is that future versions will focus on rejectionsampling of software engineering data or incorporating asynchronous evaluations during RL processes. That sounds like a strategic move to make the model more efficient.\n\nAnother point is about avoiding length bias in GPT evaluations and solidifying robustness across multiple tasks. This probably means that DeepSeek-R1 is versatile, handling various types of AI tasks well without favoriting longer responses too much.\n\nThen there's an observation on math and coding algorithm tasks, where models like OpenAI-o1-1217 outperfor

In [14]:
from langchain_core.runnables import RunnablePassthrough

RAG_TEMPLATE = """
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.

<context>
{context}
</context>

Answer the following question:

{question}"""

rag_prompt = ChatPromptTemplate.from_template(RAG_TEMPLATE)

retriever = vectorstore.as_retriever()

qa_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | model
    | StrOutputParser()
)

question = "What is the purpose of the DeepSeek project?"

qa_chain.invoke(question)

"<think>\nOkay, so I need to figure out the purpose of the DeepSeek project based on the provided context. Let me read through it carefully.\n\nThe context mentions that DeepSeek-R1 has demonstrated improvements over DeepSeek-V3 on software engineering benchmarks and future versions will include rejections sampling or asynchronous evaluations. It also talks about a pipeline for developing DeepSeek-R1, which uses two RL stages: one for discovering improved reasoning patterns aligned with human preferences and another as seed data. Then, the project starts by collecting thousands of cold-start data to fine-tune V3-Basemodel, followed by RL-like Zero, and then creates new SFT data using rejection sampling on RL checkpoints combined with supervised data from V3.\n\nHmm, it seems like DeepSeek-R1 is developed through a pipeline that includes both reinforcement learning (RL) stages and some form of data generation. The main focus seems to be on improving performance in software engineering t