In [35]:
s = '''
A revolutionary way for guiding the eyes through text using artificial fixation spots to make reading easier. As a result, the reader's attention is drawn solely to the highlighted starting letters, leaving the word to be completed by the brain center. Bionic Reading attempts to foster greater in-depth reading and understanding of textual content in a digital environment dominated by shallow kinds of reading.
'''


帮我对s这个句子进行bionic reading处理
所谓bionic reading就是将一个单词的前面一半字母加粗，你需要借用一个PDF库函数，将这句话导出成一个PDF

In [36]:
import spacy
from reportlab.lib.pagesizes import A5
from reportlab.lib.units import inch
from reportlab.pdfgen import canvas

# 加载英文模型
nlp = spacy.load("en_core_web_sm")

def is_english(text):
    doc = nlp(text)
    return all(token.lang_ == 'en' for token in doc)

def process_text_to_pdf(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f:
        text = f.read()

    if not is_english(text):
        raise ValueError("The text is not in English.")

    # 创建PDF
    c = canvas.Canvas(output_file, pagesize=A5)
    width, height = A5
    y_position = height - 1 * inch  # 开始位置
    x_position = 0.5 * inch  # X位置
    c.setFont("Helvetica", 12)

    # 句子拆分
    doc = nlp(text)

    for sent in doc.sents:
        line = ""
        for token in sent:
            if token.text.lower() in ['a', 'of', 'the', 'and', 'in', 'to']:  # 助词
                line += token.text + ' '
            else:
                bold_part = token.text[:len(token.text)//2]
                normal_part = token.text[len(token.text)//2:]
                line += bold_part + ' ' + normal_part + ' '  # 加粗部分和普通部分

        # 自动换行处理
        words = line.split()
        for word in words:
            if word.strip():  # 跳过空白
                word_width = c.stringWidth(word, "Helvetica", 12)
                if x_position + word_width > width - 0.5 * inch:  # 检查是否超出边界
                    y_position -= 14  # 换行
                    x_position = 0.5 * inch  # 重置X位置

                # 判断是否需要加粗
                if word.lower() not in ['a', 'of', 'the', 'and', 'in', 'to']:
                    c.setFont("Helvetica-Bold", 12)  # 加粗
                else:
                    c.setFont("Helvetica", 12)  # 普通

                c.drawString(x_position, y_position, word)  # 输出当前单词
                x_position += word_width + 2  # 调整位置

        y_position -= 14  # 行间距
        if y_position < inch:  # 换页
            c.showPage()
            c.setFont("Helvetica", 12)
            y_position = height - 1 * inch
            x_position = 0.5 * inch  # 重置X位置

        y_position -= 14  # 在句子之间换行

    c.save()

# 使用示例
input_txt = 'test.txt'
output_pdf = 'output.pdf'
process_text_to_pdf(input_txt, output_pdf)


In [39]:
import spacy
from docx import Document
from docx.shared import Pt

# 加载英文模型
nlp = spacy.load("en_core_web_sm")

def is_english(text):
    doc = nlp(text)
    return all(token.lang_ == 'en' for token in doc)

def process_text_to_word(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f:
        text = f.read()

    if not is_english(text):
        raise ValueError("The text is not in English.")

    # 创建Word文档
    doc = Document()

    # 句子拆分
    doc_nlp = nlp(text)

    for sent in doc_nlp.sents:
        paragraph = doc.add_paragraph()
        for token in sent:
            if token.text.lower() in ['a', 'of', 'the', 'and', 'in', 'to']:  # 助词
                paragraph.add_run(token.text + ' ')
            else:
                bold_part = token.text[:len(token.text)//2]
                normal_part = token.text[len(token.text)//2:]

                # 添加前半部分加粗
                run = paragraph.add_run(bold_part)
                run.bold = True

                # 添加后半部分普通
                paragraph.add_run(normal_part + ' ')

        # 在句子之间换行
        paragraph.add_run()  # 添加一个空段落

    doc.save(output_file)

# 使用示例
input_txt = 'test.txt'
output_docx = 'output.docx'
process_text_to_word(input_txt, output_docx)


In [47]:
import spacy
from docx import Document
from docx.shared import Pt, RGBColor

# 加载英文模型
nlp = spacy.load("en_core_web_sm")

def is_english(text):
    doc = nlp(text)
    return all(token.lang_ == 'en' for token in doc)

def process_text_to_word(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f:
        text = f.read()

    if not is_english(text):
        raise ValueError("The text is not in English.")

    # 创建Word文档
    doc = Document()

    # 句子拆分
    doc_nlp = nlp(text)

    for sent in doc_nlp.sents:
        paragraph = doc.add_paragraph()
        for token in sent:
            if token.text.lower() in ['a', 'of', 'the', 'and', 'in', 'to']:  # 助词
                run = paragraph.add_run(token.text + ' ')
                run.font.color.rgb = RGBColor(96, 96, 96)  # 灰色
            else:
                bold_part = token.text[:len(token.text)//2]
                normal_part = token.text[len(token.text)//2:]

                # 添加前半部分加粗
                run = paragraph.add_run(bold_part)
                run.bold = True

                # 添加后半部分普通并设为灰色
                run = paragraph.add_run(normal_part + ' ')
                run.font.color.rgb = RGBColor(96, 96, 96)  # 灰色

        # 在句子之间换行
        paragraph.add_run()  # 添加一个空段落

    doc.save(output_file)

# 使用示例
input_txt = 'test.txt'
output_docx = 'output.docx'
process_text_to_word(input_txt, output_docx)
