In [1]:
from docx import Document
from docx.shared import Pt, Cm, RGBColor
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.enum.text import WD_ALIGN_PARAGRAPH

class MultilevelList:
    """
    A class to manage multilevel lists within a Word document.

    Attributes:
        document: A docx.Document object representing the Word document.
        levels: A list of dictionaries containing formatting details for each level of the list.

    Methods:
        __init__(document): Initializes the class with a Word document.
        add_heading(text, level): Adds a heading with specific formatting to the document.
        _set_page_formatting(): Sets the formatting for the document's pages.
        _font_size(font_size_name): Returns the point size corresponding to a named font size.
        _set_footer(): Sets the footer for the document.
        _set_styles(): Sets the paragraph styles for each level of the list.
    """
    def __init__(self, document):
        """
        Initializes the class with a Word document.
        Args:
            document: A docx.Document object representing the Word document.
        """
        self.document = document
        self.levels = [
            {'font': '黑体', 'font_size': '小四', 'bold': False, 'italic': False, 'numbering': '一、'},
            {'font': '楷体', 'font_size': '小四号', 'bold': False, 'italic': False, 'numbering': '一、'},
            {'font': '宋体', 'font_size': '五号', 'bold': False, 'italic': False, 'numbering': '（一）'},
            {'font': '宋体', 'font_size': '五号', 'bold': False, 'italic': False, 'numbering': '1.'}]
        self.counters = [0] * len(self.levels)
        self._set_page_formatting()
        self._set_styles()
        self._set_footer()
    
    def add_heading(self, text, level):
        """
        Adds a heading with specific formatting to the document.

        Args:
            text: The text of the heading.
            level: The level of the heading in the multilevel list.
        """
        # Increment counter for the current level and reset counters for lower levels
        self.counters[level - 1] += 1
        self.counters[level:] = [0] * (len(self.levels) - level)

        level_settings = self.levels[level - 1]
        heading = self.document.add_paragraph(style='Heading{}'.format(level))
        heading.alignment = WD_ALIGN_PARAGRAPH.LEFT

        # Determine the numbering text based on the level and counter
        numbering_format = level_settings['numbering']
        if numbering_format in ["一、", "（一）"]:
            numbering_text = ["一", "二", "三", "四", "五", "六", "七", "八", "九", "十"]
            numbering = numbering_text[self.counters[level - 1] % 10 - 1] + "、"  # Handle more than 10
        else:
            numbering = str(self.counters[level - 1]) + '.'

        # Concatenate the numbering text with the heading text
        run = heading.add_run(numbering + text)

        # Apply the custom formatting
        run.font.name = level_settings['font']
        run.font.size = Pt(self._font_size(level_settings['font_size']))
        run.font.bold = level_settings['bold']
        run.font.italic = level_settings['italic']
        run._element.rPr.rFonts.set(qn('w:eastAsia'), level_settings['font'])

        # Set color to black
        run.font.color.rgb = RGBColor(0, 0, 0)
        
    def _convert_chinese_number(self, number):
        chinese_numbers = ["一", "二", "三", "四", "五", "六", "七", "八", "九"]
        if number <= 10:
            return chinese_numbers[number - 1]
        else:
            tens = number // 10
            units = number % 10
            return "十" + (chinese_numbers[units - 1] if units != 0 else "")
    

    def _set_page_formatting(self):
        """Sets the formatting for the document's pages, including margins."""
        section = self.document.sections[-1]
        section.left_margin = Cm(1.9)
        section.right_margin = Cm(1.9)
        section.top_margin = Cm(2.3)
        section.bottom_margin = Cm(2.3)

    def _font_size(self, font_size_name):
        """
        Returns the point size corresponding to a named font size.

        Args:
            font_size_name: A string representing the named font size.

        Returns:
            A float representing the point size.
        """
        sizes = {
            '小四': 12,
            '小四号': 12,
            '五号': 10.5,
        }
        return sizes.get(font_size_name, 12)
        

    def _set_footer(self):
        """Sets the footer for the document, including the page number."""
        # Get the footer of the first section
        footer = self.document.sections[0].footer

        # Clear any existing footer content
        for paragraph in footer.paragraphs[:]:
            paragraph.clear()

        # Create a new paragraph in the footer
        paragraph = footer.add_paragraph()

        # Set the alignment to center
        paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

        # Add the page number field code
        run = paragraph.add_run()
        fldChar = OxmlElement('w:fldChar')  # Creates a field character element
        fldChar.set(qn('w:fldCharType'), 'begin')  # Sets the field character type
        instrText = OxmlElement('w:instrText')
        instrText.set(qn('xml:space'), 'preserve')  # Preserves the space
        instrText.text = 'PAGE'  # Page number field code
        fldChar2 = OxmlElement('w:fldChar')
        fldChar2.set(qn('w:fldCharType'), 'end')

        run._r.append(fldChar)
        run._r.append(instrText)
        run._r.append(fldChar2)
        run.font.name = 'Times New Roman'
        run.font.size = Pt(self._font_size('小五号'))
  
            
    def _set_styles(self):
        """
        Sets the paragraph styles for each level of the list, including font, size, bold, and italic.
        """
        for level, level_settings in enumerate(self.levels):
            # Retrieve existing style or create a new one if not exists
            try:
                heading_style = self.document.styles['Heading {}'.format(level + 1)]
            except KeyError:
                heading_style = self.document.styles.add_style('Heading {}'.format(level + 1), WD_STYLE_TYPE.PARAGRAPH)

            # Apply settings to the style
            heading_style.font.name = level_settings['font']
            heading_style.font.size = Pt(self._font_size(level_settings['font_size']))
            heading_style.font.bold = level_settings['bold']
            heading_style.font.italic = level_settings['italic']
            heading_style.font._element.rPr.rFonts.set(qn('w:eastAsia'), level_settings['font'])


# Default settings
MultilevelList.levels =[
        {'font': '黑体', 'font_size': '小四', 'bold': False, 'italic': False, 'numbering': '一、'},
        {'font': '楷体', 'font_size': '小四号', 'bold': False, 'italic': False, 'numbering': '（一）'},
        {'font': '宋体', 'font_size': '五号', 'bold': False, 'italic': False, 'numbering': '1.'},
        {'font': '宋体', 'font_size': '五号', 'bold': False, 'italic': False, 'numbering': '（1）'},
        {'font': '宋体', 'font_size': '五号', 'bold': False, 'italic': False, 'numbering': '①'}
    ]



In [2]:
def create_document_from_string(input_string, output_file):
    # Create a new Word doc
    doc = Document()
    
    # Initialize the MultilevelList class with the doc
    multilevel_list = MultilevelList(doc)
    multilevel_list._set_footer()
    multilevel_list._set_styles()

    # Split the input string by lines
    lines = input_string.strip().split('\n')

    # Dynamically generate the level_map dictionary using string manipulation
    max_levels = 5
    level_map = {f"HD{i}": i for i in range(1, max_levels + 1)}

    # Iterate through the lines and process them
    for line in lines:
        # Extract the level from the line
        level_tag = line[1:4]

        # Get the level from the level_map, default to 1 if not found
        level = level_map.get(level_tag, 1)

        # Extract the text content from the line
        content = line[5:].strip()

        # Add the heading to the doc using the MultilevelList class
        multilevel_list.add_heading(content, level)

    # Save the doc to the specified output file
    doc.save(output_file)


In [3]:
import glob
import os

def read_response_string(folder_path="C://Users//georg//Desktop//Bulk processing//Response String", filename=None):
    """
    Read a string from a text file in a specific folder.

    Parameters:
    - folder_path (str): The path to the folder where the text files are stored. Must be provided if filename is not.
    - filename (str, optional): The specific file to read from. If None, the function reads from the latest file in the folder.

    Returns:
    - str: The content of the file as a string, or None if no matching files are found or folder path is not provided.

    Example:
    - Read from the latest file in a specific folder: read_string_from_file(folder_path='/path/to/folder')
    - Read from a specific file: read_string_from_file(filename='/path/to/folder/filename.txt')
    """
    if filename is None:
        if folder_path is None:
            print("Please specify a folder path")
            return None

    # Construct the pattern with the folder path
    pattern = os.path.join(folder_path, 'response_str_*.txt')
    print(f"Searching in folder: {folder_path}")
    print(f"Pattern: {pattern}")

    # Find all files that match the pattern
    files = glob.glob(pattern)
    print(f"Found files: {files}")
    
    # Find the latest file based on creation time
    latest_file = max(files, key=os.path.getctime) if files else None
    
    if latest_file is None:
        print(f"No matching files found in {folder_path}")
        return None

    filename = latest_file

    with open(filename, 'r') as file:
        return file.read()






In [4]:
folder_path = 'Response String'
response_string = read_response_string(folder_path=folder_path)
create_document_from_string(response_string, 'test.docx')


Searching in folder: Response String
Pattern: Response String\response_str_*.txt
Found files: ['Response String\\response_str_20230812114711.txt', 'Response String\\response_str_20230812121737.txt', 'Response String\\response_str_20230812121836.txt', 'Response String\\response_str_20230812122020.txt', 'Response String\\response_str_20230812123106.txt', 'Response String\\response_str_20230812123602.txt', 'Response String\\response_str_20230812124201.txt', 'Response String\\response_str_20230812185057.txt', 'Response String\\response_str_20230812190805.txt', 'Response String\\response_str_20230812190851.txt', 'Response String\\response_str_20230812191013.txt', 'Response String\\response_str_20230812191039.txt', 'Response String\\response_str_20230812191150.txt', 'Response String\\response_str_20230812191201.txt', 'Response String\\response_str_20230812191225.txt', 'Response String\\response_str_20230812191252.txt', 'Response String\\response_str_20230812191835.txt', 'Response String\\res

  return self._get_style_id_from_style(self[style_name], style_type)
