Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tiny script for Chinese kindle users #290

Open
Maxinsomnia opened this issue Jun 8, 2024 · 0 comments
Open

Tiny script for Chinese kindle users #290

Maxinsomnia opened this issue Jun 8, 2024 · 0 comments
Labels
enhancement New feature or request

Comments

@Maxinsomnia
Copy link

Is your feature request related to a problem? Please describe.
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
Error occurred when I use sync from MyClipping.txt. I can see the reason comes from the language.

Describe the solution you'd like
A clear and concise description of what you want to happen.
I wrote a script to change the highlight and bookmark record into English. After running the script, the plugin works.

Describe alternatives you've considered
A clear and concise description of any alternative solutions or features you've considered.
You can paste this script somewhere in the readme in case other Chinese need help. Or maybe you can integrate into the workflow to convert while you detect it's Chinese

Additional context
Add any other context or screenshots about the feature request here.
Here is the code below. Btw, thanks for your efforts. I love it:

import re
from datetime import datetime

# Function to convert Chinese day of the week to English
def chinese_day_to_english(day):
    days = {
        '星期一': 'Monday',
        '星期二': 'Tuesday',
        '星期三': 'Wednesday',
        '星期四': 'Thursday',
        '星期五': 'Friday',
        '星期六': 'Saturday',
        '星期日': 'Sunday'
    }
    return days.get(day, '')

# Function to convert Chinese date string to English date string
def convert_chinese_to_english_hl(chinese_str):
    if not isinstance(chinese_str, str):
        print(chinese_str)
        raise TypeError("The str should be a string.")
    # Extract parts using regex for both 上午 and 下午
    match = re.match(r"您在位置 #(\d+-\d+)的标注 \| 添加于 (\d{4})年(\d{1,2})月(\d{1,2})日(星期[一二三四五六日]) (上午|下午)(\d{1,2}):(\d{2}):(\d{2})", chinese_str)
    if not match:
        raise ValueError("String format is incorrect")

    # Extracted parts
    location = match.group(1)
    year = match.group(2)
    month = match.group(3)
    day = match.group(4)
    weekday_cn = match.group(5)
    period = match.group(6)
    hour = int(match.group(7))
    minute = match.group(8)
    second = match.group(9)

    # Convert to 24-hour format if period is '下午'
    if period == '下午' and hour != 12:  # Skip if it's 12 PM since it is already in 24-hour format
        hour += 12
    elif period == '上午' and hour == 12:  # Convert 12 AM to 00
        hour = 0

    # Convert weekday to English
    weekday_en = chinese_day_to_english(weekday_cn)

    # Reformat date and time
    datetime_obj = datetime(year=int(year), month=int(month), day=int(day), hour=hour, minute=int(minute), second=int(second))
    date_en = datetime_obj.strftime("%d %b %Y %H:%M:%S")

    # Assemble final string
    result = f"Your Highlight at location {location} | Added on {weekday_en}, {date_en}"
    return result

def convert_chinese_to_english_bm(chinese_str):
    if not isinstance(chinese_str, str):
        print(chinese_str)
        raise TypeError("The str should be a string.")
    # Extract parts using regex for both 上午 and 下午
    match = re.match(r"您在位置 #(\d+)的书签 \| 添加于 (\d{4})年(\d{1,2})月(\d{1,2})日(星期[一二三四五六日]) (上午|下午)(\d{1,2}):(\d{2}):(\d{2})", chinese_str)
    if not match:
        raise ValueError("String format is incorrect")

    # Extracted parts
    location = match.group(1)
    year = match.group(2)
    month = match.group(3)
    day = match.group(4)
    weekday_cn = match.group(5)
    period = match.group(6)
    hour = int(match.group(7))
    minute = match.group(8)
    second = match.group(9)

    # Convert to 24-hour format if period is '下午'
    if period == '下午' and hour != 12:  # Skip if it's 12 PM since it is already in 24-hour format
        hour += 12
    elif period == '上午' and hour == 12:  # Convert 12 AM to 00
        hour = 0

    # Convert weekday to English
    weekday_en = chinese_day_to_english(weekday_cn)

    # Reformat date and time
    datetime_obj = datetime(year=int(year), month=int(month), day=int(day), hour=hour, minute=int(minute), second=int(second))
    date_en = datetime_obj.strftime("%d %b %Y %H:%M:%S")

    # Assemble final string
    result = f"Your Bookmark at location {location} | Added on {weekday_en}, {date_en}"
    return result

# Function to find and replace all matches in a document
def find_and_replace_all_hl(document):
        
    pattern = r"您在位置 #(\d+-\d+)的标注 \| 添加于 (\d{4})年(\d{1,2})月(\d{1,2})日(星期[一二三四五六日]) (上午|下午)(\d{1,2}):(\d{2}):(\d{2})"
    matches = re.finditer(pattern, document)
    
    for match in matches:
        original_text = match.group(0)
        english_str = convert_chinese_to_english_hl(original_text)
        document = document.replace(original_text, english_str, 1)      
    return document

def find_and_replace_all_bm(document):
        
    pattern = r"您在位置 #(\d+)的书签 \| 添加于 (\d{4})年(\d{1,2})月(\d{1,2})日(星期[一二三四五六日]) (上午|下午)(\d{1,2}):(\d{2}):(\d{2})"
    matches = re.finditer(pattern, document)
    
    for match in matches:
        original_text = match.group(0)
        english_str = convert_chinese_to_english_bm(original_text)
        document = document.replace(original_text, english_str, 1)      
    return document

def read_txt_file(file_path, num_lines=5):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
        return content
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

def write_txt_file(file_path, content):
    try:
        with open(file_path, 'w', encoding='utf-8') as file:
            file.write(content)
    except Exception as e:
        print(f"Error writing to file: {e}")

# Example usage
file_path = '/path/My Clippings.txt'  # Replace with your actual file path
output_file_path = '/path/My Clippings-new.txt'

# file_content = read_txt_file(file_path, num_lines=5)  # Read first 5 lines
file_content = read_txt_file(file_path)  # Read first 5 lines
if file_content:
    print(file_content)
    english_document = find_and_replace_all_hl(file_content)
    english_document = find_and_replace_all_bm(english_document)
    write_txt_file(output_file_path, english_document)
@Maxinsomnia Maxinsomnia added the enhancement New feature or request label Jun 8, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request
Projects
None yet
Development

No branches or pull requests

1 participant