# 1. Extract column text and urls from excel file


In [4]:
import openpyxl
import re

def remove_parentheses_content(text):
    """
    移除字符串尾部括号及其内的内容
    :param text: 输入的字符串
    :return: 去掉尾部括号内容的字符串
    """
    return re.sub(r'\s*\(.*?\)$', '', text) if text else text

def read_and_process_data(excel_file_path, column_name='Name', url_column='URL', start_row=6, end_row=63):
    """
    读取 Excel 文件中指定列的文本和对应的超链接（如果存在），并按要求生成多个字段。
    
    :param excel_file_path: Excel 文件路径
    :param column_name: 包含问题名称的列名
    :param url_column: 包含 URL 的列名
    :param start_row: 开始提取数据的行号
    :return: 包含处理后数据的字典列表
    """
    workbook = openpyxl.load_workbook(excel_file_path)
    sheet = workbook.active  # 默认读取第一个工作表

    # 找到指定列的索引
    name_column_index = None
    difficulty_column_index = None
    for col in sheet.iter_cols(1, sheet.max_column):
        if not name_column_index or not difficulty_column_index:
            if col[0].value == 'Name':
                name_column_index = col[0].column
            elif col[0].value == 'Difficulty':
                difficulty_column_index = col[0].column

    if name_column_index is None or difficulty_column_index is None:
        raise ValueError(f"Column '{name_column_index}' or '{difficulty_column_index}' not found in the sheet.")

    data_list = []

    # 提取指定列的文本和超链接，从 start_row 行开始
    for index, row in enumerate(sheet.iter_rows(min_row=start_row, max_row=end_row)):

        # 1. name_cell
        name_cell = row[name_column_index - 1]

        # question_name         
        if name_cell.value:
            question_name = remove_parentheses_content(name_cell.value).strip()
        else:
            question_name = "No Name"

        # source_name
        problem_index = f"{index + 2:04d} "
        source_name = 'deep-ml-' +problem_index.replace(' ', '-') + question_name.replace(' ', '_')

        # source_name_url
        source_name_url = name_cell.hyperlink.target if name_cell.hyperlink else "No URL"

        # file_name
        file_name = (problem_index + question_name + '.ipynb').replace(' ', '_').lower()

        # file_path
        file_path = 'ipynb_codes/' + file_name

        # 2. difficulty_cell
        difficulty_cell = row[difficulty_column_index - 1]
        difficulty = difficulty_cell.value.capitalize() if difficulty_cell.value else "No Difficulty"




        print('** debug **', (question_name, source_name, source_name_url, file_name, file_path, difficulty))
        data_list.append({
            'question_name': question_name,
            'source_name': source_name,
            'source_name_url': source_name_url,
            'file_name': file_name,
            'file_path': file_path,
            'difficulty': difficulty
        })

    return data_list

# 示例使用
excel_file_path = 'deep-ml_2024.xlsx'  # 替换为实际 Excel 文件路径
data = read_and_process_data(excel_file_path)

# print('** debug **', data)


** debug ** ('Transpose of a Matrix', 'deep-ml-0002-Transpose_of_a_Matrix', 'https://www.deep-ml.com/problem/Transpose%20of%20a%20Matrix', '0002_transpose_of_a_matrix.ipynb', 'ipynb_codes/0002_transpose_of_a_matrix.ipynb', 'Easy')
** debug ** ('Reshape Matrix', 'deep-ml-0003-Reshape_Matrix', 'https://www.deep-ml.com/problem/Reshape%20Matrix', '0003_reshape_matrix.ipynb', 'ipynb_codes/0003_reshape_matrix.ipynb', 'Easy')
** debug ** ('Calculate Mean by Row or Column', 'deep-ml-0004-Calculate_Mean_by_Row_or_Column', 'https://www.deep-ml.com/problem/Calculate%20Mean%20by%20Row%20or%20Column', '0004_calculate_mean_by_row_or_column.ipynb', 'ipynb_codes/0004_calculate_mean_by_row_or_column.ipynb', 'Easy')
** debug ** ('Scalar Multiplication of a Matrix', 'deep-ml-0005-Scalar_Multiplication_of_a_Matrix', 'https://www.deep-ml.com/problem/Scalar%20Multiplication%20of%20a%20Matrix', '0005_scalar_multiplication_of_a_matrix.ipynb', 'ipynb_codes/0005_scalar_multiplication_of_a_matrix.ipynb', 'Easy')

# 2. Generate chapter02_xlsx_to_md.md

In [5]:
import os

def generate_md_file(data, output_file="chapter02_xlsx_to_md.md"):
    """
    根据 data 生成一个 Markdown 文件，并将每条数据按指定格式写入表格。
    
    :param data: 包含所有问题的列表
    :param output_file: 输出的 Markdown 文件名
    """
    with open(output_file, "w") as md_file:
        # # 写入表头
        # md_file.write("<table>\n")
        # md_file.write("  <thead>\n")
        # md_file.write("    <tr>\n")
        # md_file.write("      <th rowspan=\"3\">Status</th>\n")
        # md_file.write("      <th rowspan=\"3\">Question Name</th>\n")
        # md_file.write("      <th rowspan=\"3\">Source</th>\n")
        # md_file.write("      <th rowspan=\"3\">Ipynb Implementation</th>\n")
        # md_file.write("      <th rowspan=\"3\">Difficulty</th>\n")
        # md_file.write("      <th colspan=\"4\">Complexity</th>\n")
        # md_file.write("    </tr>\n")
        # md_file.write("    <tr>\n")
        # md_file.write("      <th colspan=\"2\">Training</th>\n")
        # md_file.write("      <th colspan=\"2\">Inference</th>\n")
        # md_file.write("    </tr>\n")
        # md_file.write("    <tr>\n")
        # md_file.write("      <th>Time</th>\n")
        # md_file.write("      <th>Space</th>\n")
        # md_file.write("      <th>Time</th>\n")
        # md_file.write("      <th>Space</th>\n")
        # md_file.write("    </tr>\n")
        # md_file.write("  </thead>\n")
        # md_file.write("  <tbody>\n")
        
        # 写入每一行数据
        for item in data:
            question_name = item['question_name']
            source_name = item['source_name']
            source_name_url = item['source_name_url']
            file_name = item['file_name']
            file_path = item['file_path']
            difficulty = item['difficulty']
            
            # 根据难度设置颜色
            if difficulty.lower() == "easy":
                difficulty_color = "#28a745"  # 绿色
            elif difficulty.lower() == "medium":
                difficulty_color = "#ffc107"  # 黄色
            elif difficulty.lower() == "hard":
                difficulty_color = "#dc3545"  # 红色
            else:
                difficulty_color = "#6c757d"  # 默认灰色

            # 写入行内容
            md_file.write("    <tr>\n")
            md_file.write("      <td>☐</td>\n")
            md_file.write(f"      <td>{question_name}</td>\n")
            md_file.write(f"      <td><a href=\"{source_name_url}\">{source_name}</a></td>\n")
            md_file.write(f"      <td><a href=\"{file_path}\">{file_name}</a></td>\n")
            md_file.write(f"      <td><span style=\"color:{difficulty_color}\">{difficulty}</span></td>\n")
            md_file.write("      <td>O()</td>\n")
            md_file.write("      <td>O()</td>\n")
            md_file.write("      <td>O()</td>\n")
            md_file.write("      <td>O()</td>\n")
            md_file.write("    </tr>\n")
        
        # # 关闭表格
        # md_file.write("  </tbody>\n")
        # md_file.write("</table>\n")

# 示例使用
generate_md_file(data)
print("Markdown file has been generated: chapter02_xlsx_to_md.md")

Markdown file has been generated: chapter02_xlsx_to_md.md


In [6]:
 def generate_bash_script(data, bash_output_file="chapter02_copy_template_files.sh"):
    """
    根据 data 生成一个 Bash 文件，并将每条数据的 file_path 写入以复制模板文件。
    
    :param data: 包含所有问题的列表，每项包含 file_path 信息
    :param bash_output_file: 输出的 Bash 文件名
    """
    with open(bash_output_file, "w") as bash_file:
        # 编写 Bash 头部和模板文件路径
        bash_file.write("#!/bin/bash\n\n")
        bash_file.write("# Define the template file path\n")
        bash_file.write("TEMPLATE_FILE=\"../src/Chapter02/ipynb_codes/0000_tmplate.ipynb\"\n\n")
        
        # 写入复制命令
        bash_file.write("echo \"Starting to copy template files...\"\n\n")
        for item in data:
            # 生成目标文件的相对路径
            # print('** debug **', item['file_path'])
            relative_path = f"../src/Chapter02/{item['file_path']}"
            bash_file.write(f"mkdir -p \"$(dirname \"{relative_path}\")\"\n")
            bash_file.write(f"cp \"$TEMPLATE_FILE\" \"{relative_path}\"\n")
            bash_file.write(f"echo \"Copied to {relative_path}\"\n")
        
        bash_file.write("\necho \"All files have been copied.\"\n")


generate_bash_script(data)
print("Bash script file has been generated: chapter02_copy_template_files.sh")



Bash script file has been generated: chapter02_copy_template_files.sh
