In [10]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [43]:
from pathlib import Path
from typing import List, Dict, Optional
import logging
import json

from pydantic import BaseModel, Field
from typing import List, Optional

from tnh_scholar.xml_processing import wrap_lines, unwrap_lines, lines_from_wrapped_text
from tnh_scholar.text_processing import get_text_from_file, write_text_to_file, process_text
from tnh_scholar.utils import iterate_subdir, load_json_into_model, save_model_to_json

In [12]:
from tnh_scholar.openai_interface import token_count

In [13]:
# Configure main logger using setup_logger
import tnh_scholar.logging_config as logging_config
from tnh_scholar.logging_config import setup_logging, get_child_logger

In [14]:
setup_logging(log_filepath="postprocessing_english.log")
logger = get_child_logger("postprocessing_english")

In [15]:
from tnh_scholar import PROJECT_ROOT_DIR

In [16]:
video_storage_dir = PROJECT_ROOT_DIR / "sandbox/video_transcriptions"

In [17]:
class Section(BaseModel):
    title_vi: str = Field(
        ..., 
        description="The title of the section in Vietnamese."
    )
    title_en: str = Field(
        ..., 
        description="The translation of the title of the section in English."
    )
    summary: str = Field(
        ..., 
        description="A summary of the section in English."
    )
    start_line: int = Field(
        ..., 
        description="The starting line number of this section."
    )
    end_line: int = Field(
        ...,
        description="The ending line number of this section."
    )

class DharmaTalkSections(BaseModel):
    talk_summary: str = Field(
        ..., 
        description="A summary of the Dharma talk in English."
    )
    sections: List[Section] = Field(
        ..., 
        description="An ordered list of sections with their titles and included start and end line numbers. The sequence of line ranges for the sections must cover every line from start to finish without any overlaps or gaps."
    )

In [None]:
class TranslatedSection(Section):
    content_vi: Optional[str] = Field(
        None,
        description="The full content of the section in Vietnamese."
    )
    content_en: Optional[str] = Field(
        None,
        description="The translation of the full content of the section in English."
    )

In [18]:
def process_sections(output_file: Path, wrapped_transcript: str, section_object: DharmaTalkSections, instructions: str) -> None:
    """
    Processes sections of a document by applying provided instructions
    and writing the results to an output file.

    Args:
        output_file (Path): Path to the file where the processed sections will be written.
        wrapped_transcript (str): The transcripted with line number wrapping
        section_object: Object containing the sections to process. Each section should have 'start_line', 
                        'end_line', and 'title' attributes.
        instructions (str): Instructions for processing each section.

    Example:
        process_sections(
            output_file="output.xml",
            section_object=my_section_object,
            instructions="Process section titled '{section_title}' carefully."
        )
    """
    sections = section_object.sections
    sections_processed = []
    
    write_text_to_file(output_file, "<document>\n", overwrite=True)
    logger.info(f"Sections to process: {len(sections)}")
    for i, section in enumerate(sections):
        logger.info(f"Processing section {i+1}: '{section.title}'...")
        original_lines = lines_from_wrapped_text(
            wrapped_transcript,  
            start=section.start_line,
            end=section.end_line,
            keep_brackets=False
        )
        section_instructions = instructions.format(section_title=section.title)
        
        if i == 0:
            logger.info(f"Processing instructions:\n{section_instructions}")
        
        processed_lines = process_text(original_lines, section_instructions, batch=False)
        sections_processed.append(processed_lines)
        write_text_to_file(output_file, processed_lines, append=True)
    write_text_to_file(output_file, "</document>", append=True)
    return sections_processed
    

In [57]:
from typing import List

def convert_wrapped_lines_to_xml(wrapped_lines: str) -> str:
    """
    Converts a list of wrapped lines into a valid XML structure with <line number="x"> tags.

    Args:
        lines (List[str]): A list of strings, where each line is in the format "<n: ...>".

    Returns:
        str: A string containing valid XML.

    Example:
        lines = "\n".join([
            "<1:Today is the 20th of November, 1994.>",
            "<2:The theme of this winter retreat is>",
        ])
        print(convert_wrapped_lines_to_xml(lines))
    """
    xml_lines = []
    for line in wrapped_lines.split("\n"):
        line = line.strip()
        # Extract the line number and content using slicing
        if line.startswith('<') and line.endswith('>'):
            try:
                colon_index = line.index(':')  # Find colon separating number and content
                number = line[1:colon_index]  # Extract the line number
                content = line[colon_index + 1:-1].strip()  # Extract the content
                # Wrap the content in a valid <line> tag
                xml_lines.append(f'  <line number="{number}">{content}</line>')
            except ValueError:
                raise ValueError(f"Invalid format: {line}")
        else:
            raise ValueError(f"Invalid line format: {line}")
    return '\n'.join(xml_lines)



In [33]:
process_section_instructions_vi = """You are a highly skilled and meticulous assistant processing an audio transcript of a Dharma Talk given by Thich Nhat Hanh in Vietnamese.

Each line of the transcript is numbered in the format: <NUM:LINE> 

You goal is to divide the entire transcript into {section_count} logical sections based on content. 

For each section, give the title in Vietnamese and English, a summary in English, and the starting and ending line numbers of the section.

Also provide a summary of the talk in English.

IMPORTANT: Every line in the transcript must belong to a section. Don't leave out any lines. Don't include lines in more than one section."""


In [47]:
section_instructions_translate_vi = """You are the world's leading expert at translating Dharma talks transcribed from spoken Vietnamese.

You are translating a section titled '{section_title}' from a Dharma talk offered by Thich Nhat Hanh (Thay) in Plum Village, France.

Lines of the transcript are numbered and are given in the format <NUM:LINE>.

Your task is to translate each line into correct, clear and typical English. Add correct punctuation to create meaning that matches the speakers style and intent.

You may have to infer the Thay's intent in order to correct transcription or speaking errors and to generate a text that most closely matches the speaker's meaning,
while still giving clear and eloquent English. Give the best approximation or contextual guess if the transcript is difficult or unclear. Make no comments. 

Use Plum Village typical English style when making translations.

You may consider adjacent lines for corrections and context when generating a line, however each line of translation should be as close as possible a translation of the original line.

Some transcriptions may be from sounds such as a bell. These can be marked as [Bell].

You must faithfully capture Thay's style and presentation while creating a meaningful flow.

Do not leave out any content or summarize. 

The final output should match the same line structure and line numbering using <> as the original.

Your output should be a polished section.

Make no other changes; add no content.

Output the final text only."""

In [21]:
talk_name = "Kinh Tư Lượng [TTSĐCTTĐB 01] ｜ TS Thích Nhất Hạnh (20-11-1994, Xóm Thượng, Làng Mai)"

In [22]:
video_dir = video_storage_dir / talk_name

In [23]:
transcript_path = video_dir / f"{talk_name}.txt"

In [24]:
print(transcript_path)
transcript_path.exists()

/Users/phapman/Desktop/tnh-scholar/sandbox/video_transcriptions/Kinh Tư Lượng [TTSĐCTTĐB 01] ｜ TS Thích Nhất Hạnh (20-11-1994, Xóm Thượng, Làng Mai)/Kinh Tư Lượng [TTSĐCTTĐB 01] ｜ TS Thích Nhất Hạnh (20-11-1994, Xóm Thượng, Làng Mai).txt


True

In [25]:
transcript = get_text_from_file(transcript_path)

In [26]:
print(transcript[:1000])

Đây là ngày 20 tháng 11 năm 1994. Sáng hôm nay chúng ta đã làm lễ khai kinh ở trên Thiền đường Cam Lộ.
Đề tài của khóa tu mùa đông này là
truyền thống sân động của thiền tập.
Trong Đạo Bụt.
Và dạy tiếng Anh là The Living Tradition of Buddhist Meditation.
Chúng ta thấy rằng cái chữ thiền tập ở đây được việc bằng là tiếng Anh là Meditation.
Và truyền thống sân động của thiền tập trong Đạo Bụt có nghĩa là ngoài Đạo Bụt nó cũng có thiền tập.
Thành ra dịch thiền tập trong Đạo Bụt mình dịch là Buddhist Meditation.
Tại vì ngoài Đạo Bụt nó còn có những cái truyền thống thiền tập khác.
Chữ sân động ở đây nó có nghĩa là nó là một thực tài sống.
Chứ không phải là một cái nền triết lý mà mình nghiên cứu.
Một cái nền triết lý nó nằm trong sách vở mà mình nghiên cứu.
Và cái chủ đề này không phải là một chủ đề dành cho người nghiên cứu.
Đúng là phương diện truy thức.
Mà là một chủ đề dành cho người muốn và có cái niềm thao thức thực tập.
Vì vậy cho nên trong suốt khóa tu chúng ta sẽ có cơ hội thiên n

In [27]:
wrapped_transcript = wrap_lines(transcript, number=True)

In [50]:
print(wrapped_transcript[:1000])

<1:Đây là ngày 20 tháng 11 năm 1994. Sáng hôm nay chúng ta đã làm lễ khai kinh ở trên Thiền đường Cam Lộ.>
<2:Đề tài của khóa tu mùa đông này là>
<3:truyền thống sân động của thiền tập.>
<4:Trong Đạo Bụt.>
<5:Và dạy tiếng Anh là The Living Tradition of Buddhist Meditation.>
<6:Chúng ta thấy rằng cái chữ thiền tập ở đây được việc bằng là tiếng Anh là Meditation.>
<7:Và truyền thống sân động của thiền tập trong Đạo Bụt có nghĩa là ngoài Đạo Bụt nó cũng có thiền tập.>
<8:Thành ra dịch thiền tập trong Đạo Bụt mình dịch là Buddhist Meditation.>
<9:Tại vì ngoài Đạo Bụt nó còn có những cái truyền thống thiền tập khác.>
<10:Chữ sân động ở đây nó có nghĩa là nó là một thực tài sống.>
<11:Chứ không phải là một cái nền triết lý mà mình nghiên cứu.>
<12:Một cái nền triết lý nó nằm trong sách vở mà mình nghiên cứu.>
<13:Và cái chủ đề này không phải là một chủ đề dành cho người nghiên cứu.>
<14:Đúng là phương diện truy thức.>
<15:Mà là một chủ đề dành cho người muốn và có cái niềm thao thức thực tập

In [30]:
section_instructions = process_section_instructions_vi.format(section_count="")

In [31]:
print(section_instructions)

You are a highly skilled and meticulous assistant processing an audio transcript of a Dharma Talk given by Thich Nhat Hanh in English.

Each line of the transcript is numbered in the format: <NUM:LINE> 

You goal is to divide the entire transcript into  logical sections based on content. 

For each section, give the title, a brief summary, and starting and ending line numbers.

Also provide a brief summary of the whole text.

IMPORTANT: Every line in the transcript must belong to a section. Don't leave out any lines. Don't include lines in more than one section.


In [35]:
section_object = process_text(wrapped_transcript, section_instructions, response_object=DharmaTalkSections, max_tokens=5000)

2024-12-20 19:25:21,611 - tnh.open_ai_text_processing - [36mINFO[0m - Postprocessing started.[0m


In [36]:
json_section_path = video_dir / f"section_{talk_name}.json"

In [37]:
save_model_to_json(json_section_path, section_object)

In [94]:
# section_object = load_json_into_model(json_section_path, DharmaTalkSections)

In [38]:
print(section_object.talk_summary)

The Dharma talk by Thich Nhat Hanh discusses the theme of the winter retreat, which is the 'Living Tradition of Buddhist Meditation.' It emphasizes the need to maintain mindfulness meditation as a living, adaptive practice that addresses human suffering and needs across different eras. The talk references the use of canonical texts, particularly the Sutta on Measuring (Anumana Sutta), to provide guidelines for practitioners to review personal faults and virtues in the context of a community. It highlights the importance of Sangha building, as without a supportive community, personal transformation and practice are challenging.


In [39]:
section_object.sections

[Section(title_vi='Giới thiệu khóa tu và chủ đề truyền thống Thiền tập', title_en='Introduction to the Retreat and Theme of the Living Tradition of Meditation', summary="Thich Nhat Hanh introduces the winter retreat's focus on the 'Living Tradition of Buddhist Meditation.' This includes understanding the dynamic nature of meditation within Buddhist teachings, acknowledging that meditation exists outside Buddhism as well, and ensuring it's a responsive practice addressing human needs and suffering.", start_line=1, end_line=27),
 Section(title_vi='Lịch sử và triển khai của thiền tập Phật giáo', title_en='The History and Development of Buddhist Meditation', summary='An overview of the historical development of meditation from the time of the Buddha to the emergence of Mahayana Buddhism. The talk covers the spread of meditation practices from India to China and Vietnam, highlighting the challenges of fully understanding meditation traditions across multiple countries within a short retreat

In [42]:
len(section_object.sections)

10

In [40]:
output_xml_path = video_dir / f"formatted_{talk_name}.xml"

In [41]:
print(output_xml_path)

/Users/phapman/Desktop/tnh-scholar/sandbox/video_transcriptions/Kinh Tư Lượng [TTSĐCTTĐB 01] ｜ TS Thích Nhất Hạnh (20-11-1994, Xóm Thượng, Làng Mai)/formatted_Kinh Tư Lượng [TTSĐCTTĐB 01] ｜ TS Thích Nhất Hạnh (20-11-1994, Xóm Thượng, Làng Mai).xml


### for repairing: conditionally adding some sections or all sections as specified by the section_range

In [60]:
sections = section_object.sections
sections_processed = []

section_range = range(0, 10)

logger.info(f"Sections to process: {list(section_range)}")
for i in section_range:
    section = sections[i]
    logger.info(f"Processing section {i+1}: '{section.title_en}'...")
    original_lines = lines_from_wrapped_text(
        wrapped_transcript,  
        start=section.start_line,
        end=section.end_line,
        keep_brackets=True
    )
    section_instructions = section_instructions_translate_vi.format(section_title=section.title_en)
    
    if i == 0:
        logger.info(f"Processing instructions:\n{section_instructions}")
    
    processed_lines = process_text(original_lines, section_instructions, batch=False)
    processed_line = processed_lines
    sections_processed.append(f"<section>\n<title>{section.title_en}</title>\n{processed_lines}\n</section>")
output_str = "<document>\n" + "\n\n".join(sections_processed) + "\n</document>"
write_text_to_file(output_xml_path, output_str, overwrite=True)


2024-12-20 20:38:43,781 - tnh.postprocessing_english - [36mINFO[0m - Sections to process: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9][0m
2024-12-20 20:38:43,782 - tnh.postprocessing_english - [36mINFO[0m - Processing section 1: 'Introduction to the Retreat and Theme of the Living Tradition of Meditation'...[0m
2024-12-20 20:38:43,783 - tnh.postprocessing_english - [36mINFO[0m - Processing instructions:
You are the world's leading expert at translating Dharma talks transcribed from spoken Vietnamese.

You are translating a section titled 'Introduction to the Retreat and Theme of the Living Tradition of Meditation' from a Dharma talk offered by Thich Nhat Hanh (Thay) in Plum Village, France.

Lines of the transcript are numbered and are given in the format <NUM:LINE>.

Your task is to translate each line into correct, clear and typical English. Add correct punctuation to create meaning that matches the speakers style and intent.

You may have to infer the Thay's intent in order to correct tra

2024-12-20 20:38:48,908 - tnh.open_ai_text_processing - [36mINFO[0m - Processing completed.[0m
2024-12-20 20:38:48,910 - tnh.postprocessing_english - [36mINFO[0m - Processing section 2: 'The History and Development of Buddhist Meditation'...[0m
2024-12-20 20:38:48,912 - tnh.open_ai_text_processing - [36mINFO[0m - Postprocessing started...[0m
2024-12-20 20:38:51,004 - tnh.open_ai_text_processing - [36mINFO[0m - Processing completed.[0m
2024-12-20 20:38:51,006 - tnh.postprocessing_english - [36mINFO[0m - Processing section 3: 'Overview of the Anumana Sutta (Sutta on Measuring)'...[0m
2024-12-20 20:38:51,011 - tnh.open_ai_text_processing - [36mINFO[0m - Postprocessing started...[0m
2024-12-20 20:39:01,981 - tnh.open_ai_text_processing - [36mINFO[0m - Processing completed.[0m
2024-12-20 20:39:01,984 - tnh.postprocessing_english - [36mINFO[0m - Processing section 4: 'Introduction and Analysis of the Anumana Sutta'...[0m
2024-12-20 20:39:01,989 - tnh.open_ai_text_proc

In [53]:
print(output_str)

<document>
<1:Today is the 20th of November, 1994. This morning, we held the opening ceremony for the retreat at the Cam Lo Meditation Hall.>
<2:The theme of this winter retreat is>
<3:The Living Tradition of Meditation.>
<4:In the Buddhist tradition.>
<5:And it is translated into English as The Living Tradition of Buddhist Meditation.>
<6:We see that the term 'thiền tập' is translated into English as Meditation.>
<7:And the living tradition of meditation in Buddhism means that outside of Buddhism, there is also meditation.>
<8:Therefore, we translate 'thiền tập' in Buddhism as Buddhist Meditation.>
<9:Because outside of Buddhism, there are also other meditation traditions.>
<10:The term 'living' here means it is a living reality.>
<11:Not merely a philosophical foundation that we study.>
<12:A philosophical foundation that resides in books, which we study.>
<13:And this theme is not for those interested purely in academic research.>
<14:It pertains to experiential understanding.>
<15:

In [126]:
print(output_xml_path)

processed_videos/video_transcriptions/The Redwood Sutra： a Dharma Talk with Brother Phap Luu ｜ 2023-06-11/formatted_The Redwood Sutra： a Dharma Talk with Brother Phap Luu ｜ 2023-06-11.xml


In [127]:
#process_sections(output_xml_path, transcript, section_object, postprocess_format_instructions_en_2)

2024-12-12 21:19:15,645 - tnh.postprocessing_english - [36mINFO[0m - Sections to process: 12[0m
2024-12-12 21:19:15,647 - tnh.postprocessing_english - [36mINFO[0m - Processing section 1: 'Introduction to Mindfulness Retreat and Personal Reflections on Redwood Trees'...[0m
2024-12-12 21:19:15,647 - tnh.postprocessing_english - [36mINFO[0m - Processing instructions:
You are the world's leading expert at formatting Dharma talk audio transcriptions into written text. The talks are given by native, and mostly-fluent, English speakers. 

The current text is a section entitled 'Introduction to Mindfulness Retreat and Personal Reflections on Redwood Trees' from a Dharma Talk offered by a California-based English-speaking monastic. 

Your goal is to process the section into meaningful paragraphs while correcting errors (logical, transcription, or grammatical). 

Use <p> tags to mark paragraphs. Insert <section> and <title> tags at the beginning of the text and close with a </section> ta

2024-12-12 21:19:21,161 - tnh.postprocessing_english - [36mINFO[0m - Processing section 2: 'The Experience of Mindfulness and Spiritual Insights'...[0m
2024-12-12 21:19:21,164 - tnh.video_processing - [36mINFO[0m - Starting postprocessing...[0m
2024-12-12 21:19:24,951 - tnh.postprocessing_english - [36mINFO[0m - Processing section 3: 'Mindfulness and Technology'...[0m
2024-12-12 21:19:24,952 - tnh.video_processing - [36mINFO[0m - Starting postprocessing...[0m
2024-12-12 21:19:29,305 - tnh.postprocessing_english - [36mINFO[0m - Processing section 4: 'The Redwood Sutra and Its Qualities'...[0m
2024-12-12 21:19:29,307 - tnh.video_processing - [36mINFO[0m - Starting postprocessing...[0m
2024-12-12 21:19:43,445 - tnh.postprocessing_english - [36mINFO[0m - Processing section 5: 'Monastic Qualities and Reflection on Redwood Traits'...[0m
2024-12-12 21:19:43,447 - tnh.video_processing - [36mINFO[0m - Starting postprocessing...[0m
2024-12-12 21:19:49,784 - tnh.postproces

["<section>\n<title>Introduction to Mindfulness Retreat and Personal Reflections on Redwood Trees</title>\n\n<p>Like a redwood tree, upright in a forest of the Sangha, we feel our roots spreading out into the beautiful map on the forest floor, here in the Ocean of Peace Meditation Hall, as we listen to the three sounds of the bell.</p>\n\n<p>Dear respected Thay, dear noble community, welcome to our first Day of Mindfulness at Deer Park after many, many weeks. It's so nice to see the hall full, like a big forest.</p>\n\n<p>Many of us have seen a redwood tree or walked in the redwood forest. Can you raise your hand? Many of us. The monks and nuns just returned from Northern California, having driven two days up to the coastal redwoods near Crescent City. We camped on the banks of the Smith River. For many of our brothers and sisters, it was their first time seeing a redwood tree.</p>\n\n<p>When I was about 13, during my second trip on a plane, I visited that area. My father was on a busi

In [68]:
# sections_formatted = []
# sections_original = []
# sections = section_object.sections
# section_range = range(0, 2)
# output_file = test_dir / f"formatted_{talk_name}.xml"
# for i in section_range:
#     section = sections[i]
#     original_lines = lines_from_wrapped_text(wtest, section.start_line, section.end_line, keep_brackets=False)
#     format_instructions = postprocess_format_instructions_en_2.format(section_title=section.title)
#     logger.info(f"Formatting section '{section.title}'...")

#     if i == 0:
#         logger.info(f"Translation instructions:\n{format_instructions}")
    
#     translated_lines = postprocess_text(original_lines, format_instructions, batch=False)
#     sections_formatted.append(translated_lines)
#     write_text_to_file(output_file, translated_lines, append=True)
    

2024-12-12 20:36:56,716 - tnh.postprocessing_english - [36mINFO[0m - Formatting section 'Introduction to Vesak and Personal Celebrations'...[0m
2024-12-12 20:36:56,717 - tnh.postprocessing_english - [36mINFO[0m - Translation instructions:
You are the world's leading expert at formatting Dharma talk audio transcriptions into written text. The talks are given by native, and mostly-fluent, English speakers. 

The current text is a section entitled 'Introduction to Vesak and Personal Celebrations' from a Dharma Talk offered by a California-based English-speaking monastic. 

Your goal is to process the section into meaningful paragraphs while correcting errors (logical, transcription, or grammatical). 

Use <p> tags to mark paragraphs. Insert <section> and <title> tags at the beginning of the text and close with a </section> tag. 

You may have to infer the speaker's intent, and also use clues from context, in order to correct transcription or speaking errors and to generate a text tha

In [40]:
for i, video_dir in enumerate(iterate_subdir(video_storage_dir)):
    try:
        talk_name = video_dir.name
        
        logger.info(f"Processing talk {i+1}: '{talk_name}'") 
        
        transcript_file = video_dir / f"{talk_name}.txt"

        section_output_path = video_dir / f"section_{talk_name}.json"

        output_xml_path = video_dir / f"formatted_{talk_name}.xml"

    except Exception as e:
        logger.error(f"Error in file setup for {talk_name}: {e}, skipping.")

    if transcript_file.exists():
        
        logger.info(f"Transcript found: {transcript_file}")

        if output_xml_path.exists():
            logger.info(f"Formatted xml file found. Skipping {talk_name}.")
            
        else: 
            try:
                transcript = get_text_from_file(transcript_file)

                wrapped_transcript = wrap_lines(transcript, number=True)

                logger.info(f"Starting sectioning postprocess for {talk_name}...")

                section_object = postprocess_text(wrapped_transcript, postprocess_section_instructions_en, response_object=DharmaTalkSections, max_tokens=5000)

                write_text_to_file(section_output_path, section_object.model_dump_json())
                logger.info(f"Sectioning for {talk_name} completed. Dumped section data to {section_output_path}.")
                
                logger.info(f"Starting postprocess for {talk_name}: section formatting sequence.")            
                process_sections(output_xml_path, wrapped_transcript, section_object, postprocess_format_instructions_en_2)
                logger.info(f"Postprocessing completed for {talk_name}")

            except Exception as e:
                logger.error(f"Error processing {talk_name}: {e}. Partial processing may have been saved. Skipping to next talk file.")

    else:
        logger.info(f"No transcript found in {transcript_file}. Skipping {talk_name}")

2024-12-13 07:45:56,323 - tnh.postprocessing_english - [36mINFO[0m - Processing talk 1: 'Vesak - The Baby Buddha Within ｜ Sister Kính Nghiêm ｜ 2024-05-19'[0m
2024-12-13 07:45:56,324 - tnh.postprocessing_english - [36mINFO[0m - Transcript found: processed_videos/video_transcriptions/Vesak - The Baby Buddha Within ｜ Sister Kính Nghiêm ｜ 2024-05-19/Vesak - The Baby Buddha Within ｜ Sister Kính Nghiêm ｜ 2024-05-19.txt[0m
2024-12-13 07:45:56,324 - tnh.postprocessing_english - [36mINFO[0m - Formatted xml file found. Skipping Vesak - The Baby Buddha Within ｜ Sister Kính Nghiêm ｜ 2024-05-19.[0m
2024-12-13 07:45:56,325 - tnh.postprocessing_english - [36mINFO[0m - Processing talk 2: 'The Redwood Sutra： a Dharma Talk with Brother Phap Luu ｜ 2023-06-11'[0m
2024-12-13 07:45:56,325 - tnh.postprocessing_english - [36mINFO[0m - Transcript found: processed_videos/video_transcriptions/The Redwood Sutra： a Dharma Talk with Brother Phap Luu ｜ 2023-06-11/The Redwood Sutra： a Dharma Talk with B

In [23]:
section_object.sections

[Section(title='Invocation of Avalokiteshvara Bodhisattva', summary='The section begins with an explanation of the invocation of Avalokiteshvara Bodhisattva, emphasizing deep listening and looking into suffering within ourselves and others, and the interconnectedness of all beings. It prepares the community to experience compassion through a collective chant.', start_line=1, end_line=24),
 Section(title='Introduction of Sangha Members', summary='This section introduces Sister Peace and Brother Brett Cook, who attended the first People of Color retreat 20 years ago. It highlights the practice within families and the connection to land through spiritual guidance from monastic leaders.', start_line=25, end_line=39),
 Section(title='Teachings of Eye-Hugging Practice', summary="Brother Brett Cook shares a simple practice called 'eye-hugging', emphasizing connection and presence through smiling and making eye contact. He discusses the calming effect of this practice in fostering community co

In [20]:
token_count(str(section_object))

947