In [None]:
import openpyxl
import os

def split_excel(input_file, output_folder, header_row, split_column):
    try:
        # Check if the output folder exists, if not, create it
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        # Load the Excel file
        wb = openpyxl.load_workbook(input_file)
        sheet = wb.active

        # Collect header rows (from the first row up to and including the specified header row)
        header_rows = [row for row in sheet.iter_rows(min_row=1, max_row=header_row, values_only=True)]

        # Determine the split column index (0-based)
        split_column_index = split_column - 1

        # Use a set to store unique values in the split column
        split_column_values = set()

        # Collect unique values from the split column starting from the row after header
        for row in sheet.iter_rows(min_row=header_row+1):
            split_column_values.add(row[split_column_index].value)

        # Split and save each subset of data based on unique values in the split column
        for value in split_column_values:
            filename = f"{output_folder}/split_{value}.xlsx"
            print("Generating file:", filename)

            # Create a new Workbook for each unique value
            wb_new = openpyxl.Workbook()
            sheet_new = wb_new.active
            sheet_new.title = "Sheet1"

            # Write all header rows exactly as specified
            for row_idx, row_values in enumerate(header_rows, start=1):
                for col_idx, cell_value in enumerate(row_values, start=1):
                    sheet_new.cell(row=row_idx, column=col_idx, value=cell_value)

            # Write rows that match the split column value
            row_new = header_row + 1  # Start writing from the row after the header rows
            for row in sheet.iter_rows(min_row=header_row+1):
                if row[split_column_index].value == value:
                    for col_idx, cell in enumerate(row, start=1):
                        sheet_new.cell(row=row_new, column=col_idx, value=cell.value)
                    row_new += 1

            # Save the split file
            wb_new.save(filename)
            print("File saved:", filename)

        print("All files split successfully.")

    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
input_file = r'/Users/gejundeng/Desktop/xxxxx.xlsx'   # Input file path
output_folder = r'/Users/gejundeng/Desktop/拆分文件夹'                  # Output folder path
header_row = 2                                             # Header row (1-based index)
split_column = 2                                           # Split column (1-based index)

# Call the function
split_excel(input_file, output_folder, header_row, split_column)