In [48]:
from datetime import datetime
from docx import Document
from docx.shared import Pt, RGBColor, Inches
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.enum.section import WD_SECTION
from pathlib import Path

class DocxWriter:
    def __init__(self, 
                 template_path="volume/fs_template.docx",
                 title_font="Manrope",
                 title_size=16,
                 title_color=(0, 0, 0),  # Black
                 content_font="Arial",
                 content_size=10,
                 table_font="Arial",
                 table_size=11,
                 table_color=(0, 0, 0)):
        
        self.template_path = template_path
        
        # Title formatting
        self.title_font = title_font
        self.title_size = title_size  
        self.title_color = title_color

        # Content formatting
        self.content_font = content_font
        self.content_size = content_size
        
        # Table formatting
        self.table_font = table_font
        self.table_size = table_size
        self.table_color = table_color
    
    def fill_template(self, course, output_path=None):
        doc = Document(self.template_path)
        
        # Ensure a consistent gap after the header on every page:
        # gap_after_header = top_margin - header_distance ≈ 36pt (≈ 3 lines)
        for section in doc.sections:
            section.header_distance = Pt(12)   # 12pt from top edge to header
            section.top_margin = Pt(48)        # 48pt top margin → 36pt gap after header
            # section.left_margin = Pt(72)       # 1 inch
            # section.right_margin = Pt(72)      # 1 inch
            # section.bottom_margin = Pt(72)     # 1 inch
        
        # Fill and format title
        self._fill_title(doc, course)
        
        # Fill and format table
        self._fill_table(doc, course)
        
        # Fill header level
        self._fill_header(doc, course)
        
        # Write content if available with proper spacing after header
        if course.content:
            self._write_content(doc, course.content)
        
        # Save
        if not output_path:
            saved_name = course.name.lower().replace(" ", "_")
            output_path = f"volume/artifacts/{saved_name}_filled.docx"
        
        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
        doc.save(output_path)
        print(f"📄 Document saved to: {output_path}")
        return output_path
    
    def _fill_title(self, doc, course):
        """Fill and format the main title"""
        for paragraph in doc.paragraphs:
            if "TITRE DU COURS" in paragraph.text:
                paragraph.text = course.course_title or course.name
                # Apply Manrope 16 bold black
                for run in paragraph.runs:
                    run.font.name = self.title_font
                    run.font.size = Pt(self.title_size)
                    run.font.bold = True  # Make main title bold
                    run.font.color.rgb = RGBColor(*self.title_color)
                break
    
    def _fill_table(self, doc, course):
        """Fill and format table cells"""
        if len(doc.tables) > 0:
            table = doc.tables[0]
            
            # Define cell data: (row, col, text)
            cells_data = [
                (0, 0, f"BLOC {course.block or 'SANTE'}"),
                (1, 0, course.subject),
                (0, 1, "Auteur : RonéoAI"), 
                (1, 1, f"Date : {datetime.now().strftime('%Y-%m-%d')}")
            ]
            
            # Fill and format each cell
            for row, col, text in cells_data:
                cell = table.rows[row].cells[col]
                cell.text = text
                
                # Apply table formatting
                for paragraph in cell.paragraphs:
                    for run in paragraph.runs:
                        run.font.name = self.table_font
                        run.font.size = Pt(self.table_size)
                        run.font.color.rgb = RGBColor(*self.table_color)
    
    def _fill_header(self, doc, course):
        """Fill header level indicator"""
        for paragraph in doc.paragraphs:
            if "L1.SpS" in paragraph.text:
                new_level = f"{course.level or 'L1'}.{course.semester or 'S1'}"
                paragraph.text = paragraph.text.replace("L1.SpS", new_level)
                break
    
    def _write_content(self, doc, content):
        """Write hierarchical content with proper Google Docs-style headings"""
        
        #Add spacer paragraph after header section to ensure proper gap
        header_spacer = doc.add_paragraph()
        spacer_format = header_spacer.paragraph_format
        spacer_format.space_before = Pt(8)  # Large space before first content
        
        # Write sections with proper Word heading styles
        for i, section in enumerate(content.sections, 1):
            # Level 1 Heading: Roman numerals with Heading 1 style
            roman_title = f"{self._to_roman(i)}. {section.title}"
            heading1 = doc.add_paragraph(roman_title, style='Heading 1')
            
            # Customize Heading 1 style with more spacing for new page feel
            heading1_format = heading1.paragraph_format
            heading1_format.space_before = Pt(36)  # More space before each major section
            heading1_format.space_after = Pt(12)   # More space after title
            
            for run in heading1.runs:
                run.font.name = self.title_font  # Manrope for level 0
                run.font.size = Pt(self.title_size)  # 16pt
                run.font.bold = True
                run.font.color.rgb = RGBColor(34, 139, 34)  # Leaf green color
            
            # Write section content with proper spacing
            if section.content:
                for content_item in section.content:
                    if content_item.strip():
                        content_para = doc.add_paragraph(content_item.strip())
                        content_format = content_para.paragraph_format
                        content_format.space_after = Pt(6)
                        content_format.line_spacing = 1.15
                        
                        for run in content_para.runs:
                            run.font.name = "Calibri"
                            run.font.size = Pt(11)
            
            # Write subsections
            self._write_subsections(doc, section.subsections, i)
    
    def _write_subsections(self, doc, subsections, parent_num, level=1):
        """Write subsections with Google Docs-style heading hierarchy"""
        
        for j, subsection in enumerate(subsections, 1):
            if level == 1:
                # Level 2 Heading: Heading 2 style
                subtitle = f"{j}. {subsection.title}"
                heading = doc.add_paragraph(subtitle, style='Heading 2')
                
                # Customize Heading 2 style
                heading_format = heading.paragraph_format
                heading_format.space_before = Pt(12)
                heading_format.space_after = Pt(6)
                
                for run in heading.runs:
                    run.font.name = self.table_font  # Arial for level 1
                    run.font.size = Pt(self.title_size - 2)  # 14pt (start with -2)
                    run.font.bold = True
                    run.font.color.rgb = RGBColor(0, 0, 0)  # Black
                    
            elif level == 2:
                # Level 3 Heading: Heading 3 style
                subtitle = f"{parent_num}.{j} {subsection.title}"
                heading = doc.add_paragraph(subtitle, style='Heading 3')
                
                # Customize Heading 3 style
                heading_format = heading.paragraph_format
                heading_format.space_before = Pt(10)
                heading_format.space_after = Pt(6)
                
                for run in heading.runs:
                    run.font.name = self.table_font  # Arial for level 2
                    run.font.size = Pt(self.title_size - 3)  # 13pt (then -1)
                    run.font.bold = True
                    run.font.color.rgb = RGBColor(0, 0, 0)  # Black
                    
            else:
                # Level 4+ Heading: Regular bold text with spacing
                subtitle = f"{parent_num}.{j} {subsection.title}"
                heading = doc.add_paragraph(subtitle)
                
                heading_format = heading.paragraph_format
                heading_format.space_before = Pt(8)
                heading_format.space_after = Pt(4)
                
                for run in heading.runs:
                    run.font.name = self.table_font  # Arial for level 3+
                    run.font.size = Pt(self.title_size - 4)  # 12pt (then -1 again)
                    run.font.bold = True
                    run.font.color.rgb = RGBColor(0, 0, 0)  # Black
            
            # Write subsection content with proper spacing
            if subsection.content:
                for content_item in subsection.content:
                    if content_item.strip():
                        content_para = doc.add_paragraph(content_item.strip())
                        content_format = content_para.paragraph_format
                        content_format.space_after = Pt(6)
                        content_format.line_spacing = 1.15
                        
                        for run in content_para.runs:
                            run.font.name = self.content_font
                            run.font.size = Pt(self.content_size)
            
            # Recursively handle deeper levels
            if subsection.subsections:
                self._write_subsections(doc, subsection.subsections, f"{parent_num}.{j}", level + 1)
    
    def _to_roman(self, num):
        """Convert number to Roman numerals"""
        vals = [10, 9, 5, 4, 1]
        syms = ['X', 'IX', 'V', 'IV', 'I'] 
        roman = ''
        for val, sym in zip(vals, syms):
            count = num // val
            roman += sym * count
            num -= val * count
        return roman

# Two-shots

In [1]:
from src.course import Course

# Load: you don't have a course, create one from file
loaded_course = Course.load_from_json("/Users/youssefjanjar/Documents/formascience/class_parser/volume/artifacts/architecture_du_génome_humain_20250817_162748.json")  # ← class method
print(loaded_course.content.print_outline())


Course Outline:
Présentation du module et plan du cours
Introduction au contexte évolutif et organismes modèles
  Arbre de la vie et concept de LUCA
  Arbre des eucaryotes et choix d'organismes modèles
Notions fondamentales et définitions clés
  Homologie: définition, orthologie, paralogie et méthodes de détection
  Biologie du gène, dogme central et omiques
Le génome humain: composition et organisation générale
  Composition générale du génome: haploïde et diploïde
Détermination et séquençage du génome humain
  Historique, objectifs et ciblage de l’assemblage initial
  Cartographie génétique basée sur fréquences de recombinaison
  Cartographie physique: distances en kilobases et mégabases


In [49]:
# Test the new Google Docs-style DocX writer
writer = DocxWriter()

# Generate the document with improved formatting
output = writer.fill_template(loaded_course)
print(f"✨ Document generated with Google Docs-style headings!")
print(f"📍 Features: Professional spacing, Word heading styles, proper line breaks")


📄 Document saved to: volume/artifacts/architecture_du_génome_humain_filled.docx
✨ Document generated with Google Docs-style headings!
📍 Features: Professional spacing, Word heading styles, proper line breaks


In [29]:
# Run this to see what's actually in your template
def debug_template():
    doc = Document("volume/fs_template.docx")
    
    print("📄 Template content:")
    for i, p in enumerate(doc.paragraphs):
        if p.text.strip():
            print(f"Para {i}: '{p.text}'")
    
    print("\n📋 Table content:")
    for table in doc.tables:
        for row_idx, row in enumerate(table.rows):
            for cell_idx, cell in enumerate(row.cells):
                if cell.text.strip():
                    print(f"Cell [{row_idx},{cell_idx}]: '{cell.text}'")

debug_template()

📄 Template content:
Para 2: 'TITRE DU COURS'

📋 Table content:


In [None]:
docx_writer = DocxWriter()

docx_writer.fill_template(pilot)