In [1]:
import gspread
from google.oauth2.service_account import Credentials
import re
import configparser

In [2]:
# return list of source sheets
def source_sheets(contact_config):
    sheet_info = []
    for key in contact_config.keys():
        if 'source_sheet' in key:
            sheet, column, row = contact_config.get(key).split(',')
            sheet_info.append( (sheet, int(column), int(row)) )
    return tuple(sheet_info)

# split emails if they put two in a cell
def split_emails(raw_list):
    split_result = []
    for entry in raw_list:
        # Split on anything that isn't a valid email character
        parts = re.split(r'[^a-zA-Z0-9._%+\-@]+', entry)
        split_result.extend([p.strip() for p in parts if p.strip()])
    return split_result

# Basic email pattern: username@domain
EMAIL_REGEX = re.compile(r"^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$")
def is_valid_email(email):
    return EMAIL_REGEX.match(email) is not None

In [3]:
# get config
cp = configparser.ConfigParser() 
cp.read('config.ini')
config = dict(cp['contacts'])

# Setup auth and client
SCOPES = ['https://www.googleapis.com/auth/drive']
creds = Credentials.from_service_account_file(config.get('credentials'), scopes=SCOPES)
client = gspread.authorize(creds)

In [4]:
# Open your spreadsheet by name or URL
spreadsheet = client.open(config.get('workbook'))
sheets = source_sheets( config )

In [5]:
emails_combined = set()
for sheet, column, row in sheets:
    emails = split_emails( spreadsheet.worksheet(sheet).col_values(column)[row:] )
    emails_combined.update( {email.lower() for email in emails if is_valid_email(email) } )

In [None]:
# Build list of lists as need for sheet update
update_list = [ [email,] for email in sorted(emails_combined) ]

# Clear old data
sheet_email_lists = spreadsheet.worksheet(config.get('email_sheet'))
sheet_email_lists.batch_clear([config.get('email_range')])

# Prepare for writing
sheet_email_lists.update(range_name=f'{config.get('email_range')}{len(update_list)+1}', values=update_list )

print(f"Wrote {len(update_list)} unique emails to '{sheet_email_lists.title}' sheet.")

In [None]:
sheet_email_lists.title

In [10]:
config.get('email_range')

'A2:A'