In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, Language # Import necessary classes 

In [2]:
# Define the input text, which is formatted as Markdown. 📄
# This text includes Markdown headings, bullet points, and a code block.
text = """
# Project Name: Smart Student Tracker

A simple Python-based project to manage and track student data, including their grades, age, and academic status.


## Features

- Add new students with relevant info
- View student details
- Check if a student is passing
- Easily extendable class-based design


## 🛠 Tech Stack

- Python 3.10+
- No external dependencies


## Getting Started

1. Clone the repo
   ```bash
   git clone [https://github.com/ujjwalsolanki/student-tracker.git](https://github.com/ujjwalsolanki/student-tracker.git)

"""

In [3]:
# Initialize the splitter using `from_language`. ✂️
# This is a powerful feature of RecursiveCharacterTextSplitter that allows it to understand
# the inherent structure of specific file types (like Markdown, Python code, HTML, etc.).
# - `language=Language.MARKDOWN`: This tells the splitter to use Markdown-specific separators.
#   It will prioritize splitting at logical Markdown boundaries like headings (##), code blocks,
#   and lists, before resorting to more generic character-based splitting.
# - `chunk_size=200`: The maximum target size for each text chunk (in characters).
# - `chunk_overlap=0`: No characters will overlap between consecutive chunks.
splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.MARKDOWN,
    chunk_size=200,
    chunk_overlap=0,
)

In [4]:
# Perform the text splitting. 📝
# The `splitter.split_text(text)` method applies the language-aware splitting logic
# to the input Markdown string, returning a list of smaller text chunks.
chunks = splitter.split_text(text)

In [5]:
# Print the total number of chunks created. 📏
print(len(chunks))

4


In [6]:
# Print the content of the first chunk. 📊
# You'll likely observe that the splitter tries to keep logical Markdown sections intact,
# rather than cutting in the middle of a heading or a list item if possible.
print(chunks[0])

# Project Name: Smart Student Tracker

A simple Python-based project to manage and track student data, including their grades, age, and academic status.
