In [1]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [2]:
# Loads data from all PDF files in the "files" folder within the current directory.
from llama_index.core import SimpleDirectoryReader

# load data
loader = SimpleDirectoryReader(
            input_dir = './files',  # Points to the "files" folder in the current directory
            required_exts=[".md"],
            recursive=True
        )
docs = loader.load_data()

In [4]:
print(docs[:3])

[Document(id_='40fdb7df-ece6-4963-9cd4-b41bb9f43ef0', embedding=None, metadata={'file_path': 'c:\\Users\\Administrator\\Desktop\\deepseek_rag\\files\\Readme.md', 'file_name': 'Readme.md', 'file_size': 9971, 'creation_date': '2025-01-27', 'last_modified_date': '2025-01-27'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='# Project Workflow Documentation\r\n\r\nThis documentation provides a detailed overview of the code and instructions on how to use it. The code is a Flask application that integrates with MongoDB and OpenAI\'s GPT model to manage a project workflow.\r\n\r\n![API calls](image.png)\r\n*Figure 1: API calls*\r\n## Table of Con

In [None]:
from transformers import AutoTokenizer, AutoModel
import torch

# Load model and tokenizer from Hugging Face
model_name = "BAAI/bge-large-en-v1.5"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Define a reusable embedding function for documents
class CustomEmbedModel:
    def __init__(self, model_name="BAAI/bge-large-en-v1.5"):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name).to(self.device)

    def embed(self, texts):
        # If a single string is provided, wrap it in a list
        if isinstance(texts, str):
            texts = [texts]
        # Tokenize the input text
        inputs = self.tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
        inputs = {key: value.to(self.device) for key, value in inputs.items()}
        # Get embeddings
        with torch.no_grad():
            outputs = self.model(**inputs)
            embeddings = outputs.last_hidden_state.mean(dim=1)  # Mean pooling
        return embeddings.cpu().numpy()  # Convert to numpy array for compatibility

# Initialize the embedding model
embed_model = CustomEmbedModel()

# Process each document
doc_embeddings = []

# Loop through each document
for doc in docs:
    # Extract text from the document (this will depend on the format of the doc object from SimpleDirectoryReader)
    doc_text = doc.text  # Assuming doc.text contains the extracted text

    # Generate embeddings for the document text
    embedding = embed_model.embed(doc_text)

    # Store the embedding for this document
    doc_embeddings.append(embedding)

In [6]:
# Now `doc_embeddings` contains the embeddings for each document
print(doc_embeddings)

[array([[ 0.22960638,  0.07712092, -0.55491114, ..., -0.27139044,
         0.14393172,  0.7111468 ]], shape=(1, 1024), dtype=float32)]


In [8]:
# Vector databases
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document, Settings

# Set the HuggingFace embedding model in settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

# Set the HuggingFace model (BAAI/bge-small-en-v1.5)
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# Load documents from Markdown files
documents = SimpleDirectoryReader("./files", required_exts=[".md"]).load_data()

index = VectorStoreIndex.from_documents(documents)

# The index is now ready for querying


In [9]:
# Query Engine

from llama_index.llms.ollama import Ollama

from llama_index.core import Settings

# Setting up the LLM (Ollama) with the deepseek-r1:1.5b model

llm = Ollama(model="deepseek-r1:1.5b", request_timeout=300.0)

# Specify the LLM to be used in the settings

Settings.llm = llm

# Setup a query engine on the index previously created (assumes `index` is already defined)

query_engine = index.as_query_engine(streaming=True, similarity_top_k=4)

In [12]:
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

# Set up DeepSeek as the LLM
llm = Ollama(model="deepseek-r1:1.5b", request_timeout=300.0)

# Set up the local embedding model
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

# Load documents
documents = SimpleDirectoryReader("./files").load_data()

# Create the index with the embedding model
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)

# Create the query engine with DeepSeek as the LLM
query_engine = index.as_query_engine(llm=llm)

# Query the engine
response = query_engine.query("Give me the detailed aim of the project has seen from the markdown.")

# Print the response
print(response)


Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)


<think>
Okay, so I need to figure out the detailed aims of the project based on the provided context. The user has given a file path called Readme.md, which seems to be a documentation file for their Flask application. Looking at the introduction section, it mentions that this is a project workflow management system using MongoDB and OpenAI's GPT model.

The endpoints in the documentation include several parts like Initialize, Ask Question, Generate User Stories, and Generate Development Plans. The user also provided some example code snippets, but I'm focusing on understanding the aims from the context information, which includes the Readme.md file.

In the Readme.md file, the Introduction section talks about this being a project workflow management system. It mentions interacting with a MongoDB database and using OpenAI's GPT model for various tasks. The example code provided shows functions under different routes like /initialize, /ask_question, etc., but those are probably placehol