# Code

In [1]:
# Install the pdfrw library used to read and write PDF form fields
!pip -q install pdfrw  # Quietly installs pdfrw using pip (for Google Colab environment)

# --- Mount Google Drive ---
from google.colab import drive, files
drive.mount('/content/drive')

import os, re, zipfile
import pandas as pd
from datetime import datetime
from pytz import timezone
from pdfrw import PdfReader, PdfWriter, PdfDict, PdfName, PdfObject

# --- Function to fill a PDF form and check checkboxes ---
def fill_pdf(input_pdf_path, output_pdf_path, data, checkboxes_to_check):
    pdf = PdfReader(input_pdf_path)  # Load the PDF form template

    # Ensure form fields refresh automatically
    if not pdf.Root.AcroForm:
        pdf.Root.AcroForm = PdfDict(NeedAppearances=PdfObject("true"))
    else:
        pdf.Root.AcroForm.update({PdfName("NeedAppearances"): PdfObject("true")})

    # Loop through each page
    for page in pdf.pages:
        annotations = page.Annots
        if not annotations:
            continue

        # --- 1️⃣ Fill text fields first ---
        for annot in annotations:
            if annot.Subtype == PdfName.Widget and annot.T:
                key = annot.T.to_unicode().strip()
                if key in data:
                    annot.V = data[key]
                    annot.AP = None

        # --- 2️⃣ Check checkboxes next ---
        for annot in annotations:
            if annot.Subtype == PdfName.Widget and annot.T:
                key = annot.T.to_unicode().strip()
                if key in checkboxes_to_check:
                    annot.V = PdfName("Yes")
                    annot.AS = PdfName("Yes")

    PdfWriter().write(output_pdf_path, pdf)  # Write the modified PDF


# --- File paths ---
excel_path = "/content/drive/MyDrive/Colab_files/Data_change_portfolio/personal_data_change.xlsx"
input_pdf_path = "/content/drive/MyDrive/Colab_files/Data_change_portfolio/data_form_editable.pdf"
output_folder = "filled_forms"
os.makedirs(output_folder, exist_ok=True)

# --- Timestamp for ZIP filename (Italy time) ---
italy_tz = timezone("Europe/Rome")
timestamp = datetime.now(italy_tz).strftime("%Y%m%d_%H%M")
zip_filename = f"fulfilled_forms_{timestamp}.zip"
zipf = zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED)

# --- Load Excel ---
df = pd.read_excel(excel_path)

# --- Mapping for checkbox fields ---
checkbox_map = {
    "Gender_change": "chk_gender",
    "Name_change": "chk_name",
    "Surname_change": "chk_surname",
    "Date of Birth_change": "chk_date_of_birth",
    "Place of birth_change": "chk_place_of_birth",
    "Country of birth_change": "chk_country_of_birth",
    "Email_change": "chk_email",
}

# --- Function to check changed fields ---
def get_changed_fields(row):
    fields_map = {
        "Gender_change": "Gender",
        "Name_change": "Name",
        "Surname_change": "Surname",
        "Date of Birth_change": "Date of Birth",
        "Place of birth_change": "Place of birth",
        "Country of birth_change": "Country of birth",
        "Email_change": "Email",
    }
    changed = []
    for col, name in fields_map.items():
        value = str(row.get(col, "")).strip().upper()
        if value == "ON":
            changed.append(name)
    return changed

# --- Loop through each row in Excel ---
for index, row in df.iterrows():
    name = str(row["Name"]).strip()
    surname = str(row["Surname"]).strip()

    changed_fields = get_changed_fields(row)
    field_suffix = "_".join(changed_fields).replace(" ", "_") if changed_fields else "NO_CHANGES"

    safe_name = re.sub(r"[^a-zA-Z0-9]", "_", f"{surname}_{name}_change_request_{field_suffix}")
    output_pdf_path = os.path.join(output_folder, f"{safe_name}.pdf")

    # Birth date formatting
    birth_date_raw = row.get("Date of Birth", pd.NaT)
    birth_date = pd.to_datetime(birth_date_raw, dayfirst=True, errors="coerce")
    formatted_birth_date = birth_date.strftime("%d/%d/%Y") if pd.notnull(birth_date) else ""

    # Current date for PDF
    formatted_current_date = datetime.now(italy_tz).strftime("%d %B %Y")

    # Dictionary of PDF fields
    data = {
        "txt_director": "John Smith",
        "txt_exam_center_city": "Naples",
        "txt_exam_center_country": "Italy",
        "txt_institute_city": "Naples",
        "txt_location": "Naples",
        "txt_today_date": formatted_current_date,
        "txt_city": "Naples",
        "txt_client_code": str(row["Client code"]).zfill(8),
        "txt_exam_code": str(row["Exam code"]),
        "txt_gender": str(row["Gender"]),
        "txt_name": str(row["Name"]),
        "txt_surname": str(row["Surname"]),
        "txt_place_of_birth": str(row["Place of birth"]),
        "txt_date_of_birth": formatted_birth_date,
        "txt_country_of_birth": str(row["Country of birth"]),
        "txt_email": str(row["Email"]),
    }

    # Checkboxes that should be CHECKED
    checkboxes_to_check = [
        pdf_field for excel_col, pdf_field in checkbox_map.items()
        if str(row.get(excel_col, "")).strip().upper() == "ON"
    ]

    # ⚠️ Alert if candidate has NO changes
    if not checkboxes_to_check:
        print(f"⚠️ ALERT: Candidate {surname} {name} has NO changes (all '_change' columns are empty).")

    # Try filling PDF
    try:
        fill_pdf(input_pdf_path, output_pdf_path, data, checkboxes_to_check)
        zipf.write(output_pdf_path, os.path.basename(output_pdf_path))
        print(f"✔️ Row {index} processed: {os.path.basename(output_pdf_path)}")
    except Exception as e:
        print(f"❌ Error in row {index}: {e}")

# --- Finalize ZIP ---
zipf.close()
print(f"\n✅ All forms zipped as: {zip_filename}")

# --- Auto-download the ZIP file in Colab ---
files.download(zip_filename)


# --- Optional: Print PDF form field names ---
def print_pdf_field_names(pdf_path):
    pdf = PdfReader(pdf_path)
    field_names = set()
    for page in pdf.pages:
        annotations = page.Annots
        if annotations:
            for annot in annotations:
                if annot.Subtype == PdfName.Widget and annot.T:
                    resolved_annot = annot.resolve()
                    if resolved_annot:
                        name = resolved_annot.T.to_unicode().strip()
                        field_names.add(name)
    for name in sorted(field_names):
        print(name)


# --- Generate and display email message chunks in English ---
email_data = []  # List to hold formatted candidate data

for index, row in df.iterrows():  # Loop again through the spreadsheet
    name = str(row['Name']).strip().upper()  # Get uppercase first name
    surname = str(row['Surname']).strip().upper()  # Get uppercase surname
    full_name = f"{surname} {name}"  # Combine into full name
    changed_fields = get_changed_fields(row)  # Get changed fields
    changed_text = ", ".join(changed_fields) if changed_fields else "No changes"  # Join into a string
    email_data.append((surname, name, full_name, changed_text))  # Add to list for sorting

# Sort alphabetically by surname, then name
email_data.sort()

# Split candidates into chunks of 10 per message
chunk_size = 10
chunks = [email_data[i:i + chunk_size] for i in range(0, len(email_data), chunk_size)]

# Loop through each group and print the email message
for i, chunk in enumerate(chunks):
    subject_names = [entry[2] for entry in chunk]  # Get just the full names
    subject = f"Issues: {', '.join(subject_names)}"  # Email subject line

    body_lines = [f"- {entry[2]} ({entry[3]})" for entry in chunk]  # Create message body lines
    body = (
        "Good morning,\n"
        "I kindly ask you to update the data of the following candidates:\n\n"
        + "\n".join(body_lines)
    )  # Final email body

    print(f"\n--- Message {i + 1} ---")  # Header
    print(subject)  # Print subject line
    print("\n" + body)  # Print message body

zsh:1: command not found: pip


ModuleNotFoundError: No module named 'google'

In [None]:
change_columns = [col for col in df.columns if '_change' in col]

all_nan_rows = df[df[change_columns].isnull().all(axis=1)]

if not all_nan_rows.empty:
    print("Found rows with all 'change' columns as NaN:")
    display(all_nan_rows)
else:
    print("No rows found with all 'change' columns as NaN.")

Found rows with all 'change' columns as NaN:


Unnamed: 0,Client code,Exam code,Gender,Name,Surname,Date of Birth,Place of birth,Country of birth,Email,Gender_change,Name_change,Surname_change,Date of Birth_change,Place of birth_change,Country of birth_change,Email_change
25,73214592,28901259,,STEVEN,SMITH,34405,NEW ORLEANS,USA,steven.smith@example.com,,,,,,,


#Debug

In [None]:
df.head(1000)

Unnamed: 0,Client code,Exam code,Gender,Name,Surname,Date of Birth,Place of birth,Country of birth,Email,Gender_change,Name_change,Surname_change,Date of Birth_change,Place of birth_change,Country of birth_change,Email_change
0,73214567,28901234,M,LIAM,JOHNSON,35261,NEW YORK,USA,liam.johnson@example.com,,,ON,,ON,,
1,73214568,28901235,F,OLIVIA,DAVIS,36060,LOS ANGELES,USA,olivia.davis@example.com,,ON,,,,,
2,73214569,28901236,M,NOAH,BROWN,36624,CHICAGO,USA,noah.brown@example.com,,,,ON,,,
3,73214570,28901237,F,EMMA,WILSON,35464,HOUSTON,USA,emma.wilson@example.com,,ON,,,,,ON
4,73214571,28901238,M,ELIJAH,MOORE,35022,PHILADELPHIA,USA,elijah.moore@example.com,ON,,,,,,
5,73214572,28901239,F,AVA,TAYLOR,36336,PHOENIX,USA,ava.taylor@example.com,,,,,,ON,
6,73214573,28901240,M,LEO,ANDERSON,36905,SAN ANTONIO,USA,leo.anderson@example.com,,ON,,,,,
7,73214574,28901241,F,MIA,THOMAS,34188,SAN DIEGO,USA,mia.thomas@example.com,ON,,ON,,,,
8,73214575,28901242,M,LUCAS,JACKSON,34637,DALLAS,USA,lucas.jackson@example.com,,,,,ON,,
9,73214576,28901243,F,ISABELLA,WHITE,33675,SAN JOSE,USA,isabella.white@example.com,,,ON,,,,ON


In [None]:
# --- Print PDF form field names to identify exact keys ---
def print_pdf_field_names(pdf_path):
    pdf = PdfReader(pdf_path)
    field_names = set()
    for page in pdf.pages:
        annotations = page.Annots
        if annotations:
            for annot in annotations:
                if annot.Subtype == PdfName.Widget and annot.T:
                    name = annot.T.to_unicode().strip()
                    field_names.add(name)
    for name in sorted(field_names):
        print(name)

# --- Print PDF field names for verification ---
input_pdf_path = "/content/drive/MyDrive/Colab files/Data change portfolio/data_form_editable.pdf"

print_pdf_field_names(input_pdf_path)

PdfParseError: Could not read PDF file /content/drive/MyDrive/Colab files/Data change portfolio/data_form_editable.pdf

In [None]:
# --- Print PDF field names for verification ---
print_pdf_field_names("/content/drive/MyDrive/Colab files/Data change portfolio/data_form_editable.pdf")

chk_country_of_birth
chk_date_of_birth
chk_email
chk_gender
chk_name
chk_place_of_birth
chk_surname
txt_client_code
txt_country_of_birth
txt_date_of_birth
txt_director
txt_email
txt_exam_center_city
txt_exam_center_country
txt_exam_code
txt_gender
txt_institute_city
txt_location
txt_name
txt_place_of_birth
txt_surname
txt_today_date


In [None]:
df.head(1000)

Unnamed: 0,Client code,Exam code,Gender,Name,Surname,Date of Birth,Place of birth,Country of birth,Email,Gender_change,Name_change,Surname_change,Date of Birth_change,Place of birth_change,Country of birth_change,Email_change
0,73214567,28901234,M,LIAM,JOHNSON,35261,NEW YORK,USA,liam.johnson@example.com,,,ON,,ON,,
1,73214568,28901235,F,OLIVIA,DAVIS,36060,LOS ANGELES,USA,olivia.davis@example.com,,ON,,,,,
2,73214569,28901236,M,NOAH,BROWN,36624,CHICAGO,USA,noah.brown@example.com,,,,ON,,,
3,73214570,28901237,F,EMMA,WILSON,35464,HOUSTON,USA,emma.wilson@example.com,,ON,,,,,ON
4,73214571,28901238,M,ELIJAH,MOORE,35022,PHILADELPHIA,USA,elijah.moore@example.com,ON,,,,,,
5,73214572,28901239,F,AVA,TAYLOR,36336,PHOENIX,USA,ava.taylor@example.com,,,,,,ON,
6,73214573,28901240,M,LEO,ANDERSON,36905,SAN ANTONIO,USA,leo.anderson@example.com,,ON,,,,,
7,73214574,28901241,F,MIA,THOMAS,34188,SAN DIEGO,USA,mia.thomas@example.com,ON,,ON,,,,
8,73214575,28901242,M,LUCAS,JACKSON,34637,DALLAS,USA,lucas.jackson@example.com,,,,,ON,,
9,73214576,28901243,F,ISABELLA,WHITE,33675,SAN JOSE,USA,isabella.white@example.com,,,ON,,,,ON


In [None]:
print(df.columns.tolist())

['Client code', 'Exam code', 'Gender', 'Name', 'Surname', 'Date of Birth', 'Place of birth', 'Country of birth', 'Email', 'Gender_change', 'Name_change', 'Surname_change', 'Date of Birth_change', 'Place of birth_change', 'Country of birth_change', 'Email_change']


In [None]:
# Print all PDF form field names to identify exact keys ---
def print_pdf_field_names(pdf_path):
    pdf = PdfReader(pdf_path)
    field_names = set()
    for page in pdf.pages:
        annotations = page.Annots
        if annotations:
            for annot in annotations:
                if annot.Subtype == PdfName.Widget and annot.T:
                    name = annot.T.to_unicode().strip()
                    field_names.add(name)
    for name in sorted(field_names):
        print(name)

# --- Print PDF field names for verification ---
# The call to the function is moved here after the function is defined.
input_pdf_path = "/content/Data Form-2.pdf"

print_pdf_field_names(input_pdf_path)

In [None]:
# Print all PDF form field names to identify exact keys ---
def print_pdf_field_names(pdf_path):
    pdf = PdfReader(pdf_path)
    field_names = set()
    for page in pdf.pages:
        annotations = page.Annots
        if annotations:
            for annot in annotations:
                if annot.Subtype == PdfName.Widget and annot.T:
                    name = annot.T.to_unicode().strip()
                    field_names.add(name)
    for name in sorted(field_names):
        print(name)

# --- Print PDF field names for verification ---
# The call to the function is moved here after the function is defined.
input_pdf_path = "/content/drive/MyDrive/Colab files/Data change portfolio/Data Form-4-2.pdf"

print_pdf_field_names(input_pdf_path)

In [None]:
import os
import glob

# List all files in the current directory
all_files = glob.glob('*')
print("Files in current directory before removal:")
for file in all_files:
    print(file)

# Remove all files
for file in all_files:
    try:
        os.remove(file)
        print(f"Removed: {file}")
    except OSError as e:
        print(f"Error removing {file}: {e}")

# Verify removal
all_files_after = glob.glob('*')
print("\nFiles in current directory after removal:")
if not all_files_after:
    print("No files remaining.")
else:
    for file in all_files_after:
        print(file)

Files in current directory before removal:
single_field_pdfs
drive
filled_forms
sample_data
Error removing single_field_pdfs: [Errno 21] Is a directory: 'single_field_pdfs'
Error removing drive: [Errno 21] Is a directory: 'drive'
Error removing filled_forms: [Errno 21] Is a directory: 'filled_forms'
Error removing sample_data: [Errno 21] Is a directory: 'sample_data'

Files in current directory after removal:
single_field_pdfs
drive
filled_forms
sample_data


In [None]:
from pdfrw import PdfReader

pdf_path = "/content/drive/MyDrive/Colab files/Data change portfolio/Data Form-4-2.pdf"
pdf = PdfReader(pdf_path)

acroform = pdf.Root.AcroForm
if acroform:
    print("✅ PDF has AcroForm")
    print(acroform)
else:
    print("❌ PDF has no AcroForm")

✅ PDF has AcroForm
{'/Fields': [(12, 0)], '/DR': (13, 0)}


In [None]:
from pdfrw import PdfReader, PdfName

pdf = PdfReader("/content/drive/MyDrive/Colab files/Data change portfolio/Data Form-4-2.pdf")

from pdfrw import PdfReader, PdfName



def list_fields(fields):
    for fld in fields:
        # fld is already a PdfDict
        name = fld.get(PdfName.T)  # safely get the field name
        if name:
            # convert PdfString to unicode
            if hasattr(name, "to_unicode"):
                name = name.to_unicode()
            name = name.strip()
        else:
            name = "Unnamed"
        print(name, fld.get(PdfName.Subtype))

        # check for nested fields
        kids = fld.get(PdfName.Kids)
        if kids:
            list_fields(kids)

# Start recursion from root AcroForm
list_fields(pdf.Root.AcroForm.Fields)




hello None
Unnamed /Widget


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Create a simple DataFrame
data = {'Category': ['A', 'B', 'C', 'D', 'E'],
        'Value': [10, 25, 15, 30, 20]}
df_plot = pd.DataFrame(data)

# Create a bar plot using seaborn
plt.figure(figsize=(8, 6))
sns.barplot(x='Category', y='Value', data=df_plot)
plt.title('Simple Bar Plot using Seaborn')
plt.xlabel('Category')
plt.ylabel('Value')
plt.show()

# OLIA

In [10]:
# Install the pdfrw library used to read and write PDF form fields
!pip -q install pdfrw  # Quietly installs pdfrw using pip (for Google Colab environment)

# --- Mount Google Drive ---
from google.colab import drive, files
drive.mount('/content/drive')

import os, re, zipfile
import pandas as pd
from datetime import datetime
from pytz import timezone
from pdfrw import PdfReader, PdfWriter, PdfDict, PdfName, PdfObject

# --- Function to fill a PDF form and check checkboxes ---
def fill_pdf(input_pdf_path, output_pdf_path, data, checkboxes_to_check):
    pdf = PdfReader(input_pdf_path)  # Load the PDF form template

    # Ensure form fields refresh automatically
    if not pdf.Root.AcroForm:
        pdf.Root.AcroForm = PdfDict(NeedAppearances=PdfObject("true"))
    else:
        pdf.Root.AcroForm.update({PdfName("NeedAppearances"): PdfObject("true")})

    # Loop through each page
    for page in pdf.pages:
        annotations = page.Annots
        if not annotations:
            continue

        # --- 1️⃣ Fill text fields first ---
        for annot in annotations:
            if annot.Subtype == PdfName.Widget and annot.T:
                key = annot.T.to_unicode().strip()
                if key in data:
                    annot.V = data[key]
                    annot.AP = None

        # --- 2️⃣ Check checkboxes next ---
        for annot in annotations:
            if annot.Subtype == PdfName.Widget and annot.T:
                key = annot.T.to_unicode().strip()
                if key in checkboxes_to_check:
                    annot.V = PdfName("Yes")
                    annot.AS = PdfName("Yes")

    PdfWriter().write(output_pdf_path, pdf)  # Write the modified PDF


# --- File paths ---
excel_path = "/content/drive/MyDrive/Colab_files/Data_change_portfolio/personal_data_change.xlsx"
input_pdf_path = "/content/drive/MyDrive/Colab_files/Data_change_portfolio/data_form_editable.pdf"
output_folder = "filled_forms"
os.makedirs(output_folder, exist_ok=True)

# --- Timestamp for ZIP filename (Italy time) ---
italy_tz = timezone("Europe/Rome")
timestamp = datetime.now(italy_tz).strftime("%Y%m%d_%H%M")
zip_filename = f"fulfilled_forms_{timestamp}.zip"
zipf = zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED)

# --- Load Excel ---
df = pd.read_excel(excel_path)

# --- Mapping for checkbox fields ---
checkbox_map = {
    "Gender_change": "chk_gender",
    "Name_change": "chk_name",
    "Surname_change": "chk_surname",
    "Date of Birth_change": "chk_date_of_birth",
    "Place of birth_change": "chk_place_of_birth",
    "Country of birth_change": "chk_country_of_birth",
    "Email_change": "chk_email",
}

# --- Function to check changed fields ---
def get_changed_fields(row):
    fields_map = {
        "Gender_change": "Gender",
        "Name_change": "Name",
        "Surname_change": "Surname",
        "Date of Birth_change": "Date of Birth",
        "Place of birth_change": "Place of birth",
        "Country of birth_change": "Country of birth",
        "Email_change": "Email",
    }
    changed = []
    for col, name in fields_map.items():
        value = str(row.get(col, "")).strip().upper()
        if value == "ON":
            changed.append(name)
    return changed

# --- Loop through each row in Excel ---
for index, row in df.iterrows():
    name = str(row["Name"]).strip()
    surname = str(row["Surname"]).strip()

    changed_fields = get_changed_fields(row)
    field_suffix = "_".join(changed_fields).replace(" ", "_") if changed_fields else "NO_CHANGES"

    safe_name = re.sub(r"[^a-zA-Z0-9]", "_", f"{surname}_{name}_change_request_{field_suffix}")
    output_pdf_path = os.path.join(output_folder, f"{safe_name}.pdf")

    # Birth date formatting
    birth_date_raw = row.get("Date of Birth", pd.NaT)
    birth_date = pd.to_datetime(birth_date_raw, dayfirst=True, errors="coerce")
    formatted_birth_date = birth_date.strftime("%d/%m/%Y") if pd.notnull(birth_date) else ""

    # Current date for PDF
    formatted_current_date = datetime.now(italy_tz).strftime("%d/%m/%Y")

    # Dictionary of PDF fields
    data = {
        "txt_director": "JOHN SMITH",
        "txt_exam_center_city": "NAPLES",
        "txt_exam_center_country": "ITALY",
        "txt_institute_city": "NAPLES",
        "txt_location": "NAPLES",
        "txt_today_date": formatted_current_date,
        "txt_city": "NAPLES",
        "txt_client_code": str(row["Client code"]).zfill(8),
        "txt_exam_code": str(row["Exam code"]),
        "txt_gender": str(row["Gender"]),
        "txt_name": str(row["Name"]),
        "txt_surname": str(row["Surname"]),
        "txt_place_of_birth": str(row["Place of birth"]),
        "txt_date_of_birth": formatted_birth_date,
        "txt_country_of_birth": str(row["Country of birth"]),
        "txt_email": str(row["Email"]),
    }

    # Checkboxes that should be CHECKED
    checkboxes_to_check = [
        pdf_field for excel_col, pdf_field in checkbox_map.items()
        if str(row.get(excel_col, "")).strip().upper() == "ON"
    ]

    # ⚠️ Alert if candidate has NO changes
    if not checkboxes_to_check:
        print(f"⚠️ ALERT: Candidate {surname} {name} has NO changes (all '_change' columns are empty).")

    # Try filling PDF
    try:
        fill_pdf(input_pdf_path, output_pdf_path, data, checkboxes_to_check)
        zipf.write(output_pdf_path, os.path.basename(output_pdf_path))
        print(f"✔️ Row {index} processed: {os.path.basename(output_pdf_path)}")
    except Exception as e:
        print(f"❌ Error in row {index}: {e}")

# --- Finalize ZIP ---
zipf.close()
print(f"\n✅ All forms zipped as: {zip_filename}")


# --- Optional: Print PDF form field names ---
def print_pdf_field_names(pdf_path):
    pdf = PdfReader(pdf_path)
    field_names = set()
    for page in pdf.pages:
        annotations = page.Annots
        if annotations:
            for annot in annotations:
                if annot.Subtype == PdfName.Widget and annot.T:
                    resolved_annot = annot.resolve()
                    if resolved_annot:
                        name = resolved_annot.T.to_unicode().strip()
                        field_names.add(name)
    for name in sorted(field_names):
        print(name)


# --- Generate and display email message chunks in English ---
email_data = []  # List to hold formatted candidate data

for index, row in df.iterrows():  # Loop again through the spreadsheet
    name = str(row['Name']).strip().upper()  # Get uppercase first name
    surname = str(row['Surname']).strip().upper()  # Get uppercase surname
    full_name = f"{surname} {name}"  # Combine into full name
    changed_fields = get_changed_fields(row)  # Get changed fields
    changed_text = ", ".join(changed_fields) if changed_fields else "No changes"  # Join into a string
    email_data.append((surname, name, full_name, changed_text))  # Add to list for sorting

# Sort alphabetically by surname, then name
email_data.sort()

# Split candidates into chunks of 10 per message
chunk_size = 10
chunks = [email_data[i:i + chunk_size] for i in range(0, len(email_data), chunk_size)]

# Loop through each group and print the email message
for i, chunk in enumerate(chunks):
    subject_names = [entry[2] for entry in chunk]  # Get just the full names
    subject = f"Issues: {', '.join(subject_names)}"  # Email subject line

    body_lines = [f"- {entry[2]} ({entry[3]})" for entry in chunk]  # Create message body lines
    body = (
        "Good morning,\n"
        "I kindly ask you to update the data of the following candidates:\n\n"
        + "\n".join(body_lines)
    )  # Final email body

    print(f"\n--- Message {i + 1} ---")  # Header
    print(subject)  # Print subject line
    print("\n" + body)  # Print message body



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✔️ Row 0 processed: Adams_Zoey_change_request_Gender.pdf
✔️ Row 1 processed: Anderson_Leo_change_request_Name.pdf
✔️ Row 2 processed: Baker_Gabriel_change_request_Surname.pdf
✔️ Row 3 processed: Bianchi_Chiara_change_request_Gender_Country_of_birth.pdf
✔️ Row 4 processed: Brown_Noah_change_request_Date_of_Birth.pdf
✔️ Row 5 processed: Clark_Ethan_change_request_Place_of_birth.pdf
✔️ Row 6 processed: Garcia_Benjamin_change_request_Country_of_birth.pdf
✔️ Row 7 processed: Hall_Owen_change_request_Gender_Country_of_birth_Email.pdf
✔️ Row 8 processed: Harris_Mason_change_request_Surname.pdf
✔️ Row 9 processed: Jackson_Lucas_change_request_Place_of_birth.pdf
✔️ Row 10 processed: Johnson_Liam_change_request_Surname_Country_of_birth.pdf
✔️ Row 11 processed: King_Grace_change_request_Surname_Email.pdf
✔️ Row 12 processed: Kowalska_Zofia_change_request_Surname_Email.p

  return self._open_to_write(zinfo, force_zip64=force_zip64)


✔️ Row 33 processed: nan_nan_change_request_NO_CHANGES.pdf
⚠️ ALERT: Candidate nan nan has NO changes (all '_change' columns are empty).
✔️ Row 34 processed: nan_nan_change_request_NO_CHANGES.pdf
⚠️ ALERT: Candidate nan nan has NO changes (all '_change' columns are empty).
✔️ Row 35 processed: nan_nan_change_request_NO_CHANGES.pdf

✅ All forms zipped as: fulfilled_forms_20250907_1047.zip

--- Message 1 ---
Issues: ADAMS ZOEY, ANDERSON LEO, BAKER GABRIEL, BIANCHI CHIARA, BROWN NOAH, CLARK ETHAN, GARCIA BENJAMIN, HALL OWEN, HARRIS MASON, JACKSON LUCAS

Good morning,
I kindly ask you to update the data of the following candidates:

- ADAMS ZOEY (Gender)
- ANDERSON LEO (Name)
- BAKER GABRIEL (Surname)
- BIANCHI CHIARA (Gender, Country of birth)
- BROWN NOAH (Date of Birth)
- CLARK ETHAN (Place of birth)
- GARCIA BENJAMIN (Country of birth)
- HALL OWEN (Gender, Country of birth, Email)
- HARRIS MASON (Surname)
- JACKSON LUCAS (Place of birth)

--- Message 2 ---
Issues: JOHNSON LIAM, KING GR