In [1]:
import gspread
from google.oauth2.service_account import Credentials
import re

In [2]:
# Setup auth and client
SCOPES = ['https://www.googleapis.com/auth/drive']
creds = Credentials.from_service_account_file('credentials.json', scopes=SCOPES)
client = gspread.authorize(creds)

In [3]:
# Open your spreadsheet by name or URL
spreadsheet = client.open("AGIMBA Contacts 25-26")
sheets = (("AGMB Roster", 6, 1), ("AGIMBA Org", 2, 1), ("AG Staff", 2, 1), ("AGIMBA Contacts", 3, 1))
sheet_email_lists = spreadsheet.worksheet("EmailLists")

In [4]:
# split emails if they put two in a cell
def split_emails(raw_list):
    split_result = []
    for entry in raw_list:
        # Split on anything that isn't a valid email character
        parts = re.split(r'[^a-zA-Z0-9._%+\-@]+', entry)
        split_result.extend([p.strip() for p in parts if p.strip()])
    return split_result

# Basic email pattern: username@domain
EMAIL_REGEX = re.compile(r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$")
def is_valid_email(email):
    return EMAIL_REGEX.match(email) is not None

In [5]:
emails_combined = set()
for sheet, column, row in sheets:
    emails = split_emails( spreadsheet.worksheet(sheet).col_values(column)[row:] )
    emails_combined.update( {email.lower() for email in emails if is_valid_email(email) } )

In [6]:
# Remove duplicates (case-insensitive), preserve first casing
update_list = [ [email,] for email in sorted(emails_combined) ]

# Clear old data
sheet_email_lists.batch_clear(['A2:A'])

# Prepare for writing
sheet_email_lists.update(range_name=f'A2:A{len(update_list)+1}', values=update_list )

print(f"Wrote {len(update_list)} unique emails to '{sheet_email_lists.title}' sheet.")

Wrote 96 unique emails to 'EmailLists' sheet.
