In [None]:
""" 
The script defines a function split_pdf_to_smaller that splits a PDF into smaller parts, each containing no more than a specified number of pages. The function works as follows:

1. It takes two arguments: file_path (the path to the PDF file you want to split) and max_pages (the maximum number of pages each split PDF should contain). The default value for max_pages is 250.

2. The function uses PdfReader from PyPDF2 to read the PDF file and determine the total number of pages.

3. It loops over the pages in increments of max_pages. For each iteration, it creates a PdfWriter object and an output filename.

4. Within each block of max_pages, it adds the page to the PdfWriter object.

5. It writes the pages to the output file with the PdfWriter.write method.

6. Finally, the script calls the function split_pdf_to_smaller with a specific file path to execute the splitting.
"""
import os
from PyPDF2 import PdfReader, PdfWriter

def split_pdf_to_smaller(file_path, max_pages=250):
    reader = PdfReader(file_path)
    total_pages = len(reader.pages)

    for page in range(0, total_pages, max_pages):
        writer = PdfWriter()
        output_filename = f"{os.path.splitext(file_path)[0]}_part_{page//max_pages+1}.pdf"
            
        for page_number in range(page, min(page+max_pages, total_pages)):
            writer.add_page(reader.pages[page_number])
            
        with open(output_filename, 'wb') as output_pdf:
            writer.write(output_pdf)

split_pdf_to_smaller('C:/Users/admin/Desktop/(新曲线·心理学丛书) 戴维·迈尔斯 - 社会心理学（第11版）-人民邮电出版社 (2016).pdf') # need to change the file path here.

In [1]:
""" 
这段代码的主要功能是从CSV文件中读取数据，然后将“单词”列的内容按照每30个进行分组，并将这些分组内容输出到一个txt文件中。因此，你可以根据这个功能来命名这段代码。以下是一些命名建议：
group_and_write_words
csv_to_grouped_txt
extract_and_group_words
word_group_exporter
"""

import pandas as pd

# 加载CSV文件
df = pd.read_csv('C:/Users/admin/Desktop/1.csv')

# 截取B列的内容
column_b = df['单词']

# 按照30个一组进行区分
groups = [column_b[i:i+30] for i in range(0, len(column_b), 30)]

# 输出或处理这些分组
with open('output.txt', 'w') as f:
    for group in groups:
        f.write(','.join(map(str, group)) + '\n')
        f.write('-----\n')  # 特殊符号，用于分隔不同的组
    f.write("一共输出了{}组".format(len(groups)))

In [13]:
"""Accroding to pre-setting order to arrange your lists."""
import os
import pandas as pd

original_file_path = r"C:...\test.xlsx"
original_file_dir = os.path.dirname(original_file_path)

custom_order = [
    "Banking / Financing Costs",
    "Insurance Costs - Insurance premiums for employees working in Singapore",
    "Insurance Costs - Others",
    "Manpower Costs - Trading Professional",
    "Manpower Costs - Others",
    "Storage / Warehousing Costs",
    "Rental Costs (Offices & other premises, machinery & equipment)",
    "Freight Costs",
    "Processing Costs",
    "Depreciation Costs",
    "Entertainment",
    "Legal Services",
    "Accounting and Secretariat Services",
    "Other Operating Costs",
    "Indirect Taxes (E.g. Property tax, foreign workers levy, licence fees, stamp duties)"
]

# Loading the Excel File.
xls = pd.ExcelFile(original_file_path)

# Processing sheets.
rearranged_sheets = {}
for sheet_name in xls.sheet_names:
    df = pd.read_excel(xls, sheet_name=sheet_name)
    new_df = pd.DataFrame(columns=df.columns)

    # Reordering and adding missing categorises.
    for category in custom_order:
        if category in df[df.columns[0]].unique():
            new_df = pd.concat([new_df, df[df[df.columns[0]] == category]], ignore_index=True)
        else:
            new_row = pd.DataFrame([[category] + [0] * (len(df.columns) - 1)], columns=df.columns)
            new_df = pd.concat([new_df, new_row], ignore_index=True)

    rearranged_sheets[sheet_name] = new_df

# Writing the result.
new_file_path = os.path.join(original_file_dir, 'amend_test.xlsx')
writer = pd.ExcelWriter(new_file_path, engine='openpyxl')

for sheet_name, df in rearranged_sheets.items():
    df.to_excel(writer, sheet_name=sheet_name, index=False)

writer.save()


FileNotFoundError: [Errno 2] No such file or directory: 'C:...\\test.xlsx'