In [6]:
# ! ollama run gemma:7b
# ! ollama run llama3.2

In [1]:
# Imports
import datetime
import time

from langchain_core.vectorstores import InMemoryVectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_community.document_loaders import UnstructuredMarkdownLoader

from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


In [3]:
# Load LLM

# MODEL = 'deepseek-r1:14b'
MODEL = 'deepseek-r1:8b'
# MODEL = 'llama3.2'
# MODEL = 'gemma:7b'

llm = Ollama(model=MODEL)
embeddings = OllamaEmbeddings(model=MODEL)

In [4]:
def summarize_chapter(file_name, file_name_output, question):

  # Vars
  now = datetime.datetime.now()
  start_time = time.time()
  path_file_load = './book/'+file_name
  path_file_result = './output/'+file_name_output
  path_file_history = './output/history/'+ str(now) + '--' + file_name_output

  # Load file
  loader = UnstructuredMarkdownLoader(path_file_load)
  docs = loader.load()  
  # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
  # splits = text_splitter.split_documents(docs)
  # vectorstore = InMemoryVectorStore.from_documents(
  #     documents=splits, embedding=OpenAIEmbeddings()
  # )

  # Create vector store
  vectorstore = InMemoryVectorStore.from_documents(
    documents=docs, embedding=embeddings
  )

  retriever = vectorstore.as_retriever()

  # Create pronpt
  system_prompt = (
    # "You are an assistant for question-answering tasks. "
    "Use the following chapter to answer the question."
    "If you don't know the answer, say that you don't know. "
    # "Use three sentences maximum and keep the "
    # "answer concise."
    "\n\n"
    "{context}"
  )

  prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),
    ])

  # Create Pipe
  question_answer_chain = create_stuff_documents_chain(llm, prompt)
  rag_chain = create_retrieval_chain(retriever, question_answer_chain)

  # Execute
  results = rag_chain.invoke({"input": question})

  end_time = time.time()

  # Write
  f = open(path_file_result, "w")
  f.write(results['answer'])
  f.close()

  f = open(path_file_history, "w")
  f.write(f'model: {MODEL}\n')
  f.write(f'start at: {now}\n')
  f.write(f'system_prompt: "{system_prompt.join('; ')}"\n')
  f.write(f'question: {question}\n')
  f.write(f'time to process: {end_time - start_time}\n')
  f.write("\n\n")
  f.write(results['answer'])
  f.close()

In [None]:
summarize_chapter("01.md", "01.md", "escreva um resumo detalhado do CAPÍTULO 1 em portugues")

In [None]:
summarize_chapter("02.md", "02.md", "escreva um resumo detalhado do CAPÍTULO 2 em portugues")

In [None]:
summarize_chapter("03.md", "03.md", "escreva um resumo detalhado do CAPÍTULO 3 em portugues")

In [None]:
summarize_chapter("04.md", "04.md", "escreva um resumo detalhado do CAPÍTULO 4 em portugues")

In [None]:
summarize_chapter("05.md", "05.md", "escreva um resumo detalhado do CAPÍTULO 5 em portugues")

In [None]:
summarize_chapter("06.md", "06.md", "escreva um resumo detalhado do CAPÍTULO 6 em portugues")

In [None]:
summarize_chapter("07.md", "07.md", "escreva um resumo detalhado do CAPÍTULO 7 em portugues")

In [None]:
summarize_chapter("08.md", "08.md", "escreva um resumo detalhado do CAPÍTULO 8 em portugues")

In [None]:
summarize_chapter("09.md", "09.md", "escreva um resumo detalhado do CAPÍTULO 9 em portugues")

In [None]:
summarize_chapter("10.md", "10.md", "escreva um resumo detalhado do CAPÍTULO 10 em portugues")