In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, Language # Import necessary classes

In [2]:
# Define the input text, which is formatted as Python code. 📄
# This text includes a class definition, methods, and example usage.
text = """
class Student:
    def __init__(self, name, age, grade):
        self.name = name
        self.age = age
        self.grade = grade  # Grade is a float (like 8.5 or 9.2)

    def get_details(self):
        return self.name" # Note: There's an extra double quote here, which is a syntax error in actual Python.

    def is_passing(self):
        return self.grade >= 6.0


# Example usage
student1 = Student("Aarav", 20, 8.2)
print(student1.get_details())

if student1.is_passing():
    print("The student is passing.")
else:
    print("The student is not passing.")

"""

In [3]:
# Initialize the splitter using `from_language`. ✂️
# By setting `language=Language.PYTHON`, the splitter is configured to understand
# Python-specific syntax and structure. It will prioritize splitting at logical
# Python boundaries like class definitions, function definitions, and comments.
# - `chunk_size=300`: The maximum target size for each text chunk (in characters).
# - `chunk_overlap=0`: No characters will overlap between consecutive chunks, meaning each chunk is distinct.
splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON, # Crucially, specifies Python language for intelligent splitting
    chunk_size=300,
    chunk_overlap=0,
)

In [4]:
# Perform the text splitting. 📝
# The `splitter.split_text(text)` method applies the language-aware splitting logic
# to the input Python code, returning a list of smaller text chunks.
chunks = splitter.split_text(text)

In [5]:
# Print the total number of chunks created. 📏
print(len(chunks))

3


In [6]:
# Print the content of the second chunk. 📊
# You'll observe that the splitter tries to keep complete Python logical units (like entire methods)
# within a single chunk if the `chunk_size` allows, rather than cutting in the middle of a line or statement.
print(chunks[1])

def get_details(self):
        return self.name" # Note: There's an extra double quote here, which is a syntax error in actual Python.

    def is_passing(self):
        return self.grade >= 6.0


# Example usage
student1 = Student("Aarav", 20, 8.2)
print(student1.get_details())
