In [99]:
import gspread
from google.oauth2.service_account import Credentials
from gspread_dataframe import set_with_dataframe
import configparser
import pandas as pd

## Read in your config and open spreadsheet

In [100]:
def combine_sections(config, *sections):
    combined = {}
    for section in sections:
        combined.update(config[section])  # later sections override earlier ones
    return dict(combined)

# get config
cp = configparser.ConfigParser() 
cp.read('config.ini')
config = combine_sections(cp, "sheets", "clearances")

# Setup auth and client
SCOPES = [config.get('scopes'),]
creds = Credentials.from_service_account_file(config.get('credentials'), scopes=SCOPES)
client = gspread.authorize(creds)

# Open your spreadsheet by name or URL
spreadsheet = client.open(config.get('workbook'))

## Generate your list of approved volunteers

In [101]:
data = spreadsheet.worksheet(config.get('sheet_clearances') ).get_values("A4:G")
# Convert to list of dicts
keys = data[0]
rows = data[1:]
cleared_volunteers = [row for row in data if row[3] == "TRUE"]
cleared_volunteers_df = pd.DataFrame( [dict(zip(keys, row)) for row in cleared_volunteers] )

not_cleared_volunteers = [row for row in data if row[3] != "TRUE"]
not_cleared_volunteers_df = pd.DataFrame( [dict(zip(keys, row)) for row in not_cleared_volunteers] )

cleared_volunteers_df["full_name"] = (
    cleared_volunteers_df["First Name"]
        .str.strip()
        .str.lower()
        .str.replace(r"[^a-z]", "", regex=True)
    + "|" +
    cleared_volunteers_df["Last Name"]
        .str.strip()
        .str.lower()
        .str.replace(r"[^a-z]", "", regex=True)  # remove non-ASCII

)

not_cleared_volunteers_df["full_name"] = (
    not_cleared_volunteers_df["First Name"]
        .str.strip()
        .str.lower()
        .str.replace(r"[^a-z]", "", regex=True)  # remove non-ASCII
    + "|" +
    not_cleared_volunteers_df["Last Name"]
        .str.strip()
        .str.lower()
        .str.replace(r"[^a-z]", "", regex=True)  # remove non-ASCII
)

cleared_volunteers_df[cleared_volunteers_df["First Name"] == "Heather"]


Unnamed: 0,First Name,Last Name,Email,Verified,Clearence Expiration,Employee,Notes,full_name
42,Heather,Miller,mwm29k@gmail.com,True,,True,,heather|miller
43,Heather,Muah,hling@ateina.com.lr,True,3/17/2030,,Same as heather ling,heather|muah
44,Heather,Ling,hling@ateina.com.lr,True,3/17/2030,,Same as heather muah,heather|ling
46,Heather,O'Neill,hlf1277@yahoo.com,True,10/3/2026,,,heather|oneill


## Read in the signup genius report

In [102]:
def format_phone(num):
    if pd.isna(num) or num.strip() == "":
        return ""  # keep blanks as blanks
    num = "".join(filter(str.isdigit, num))  # strip non-digits just in case
    if len(num) == 10:  # US style number
        return f"{num[0:3]}.{num[3:6]}.{num[6:]}"
    return num  # fallback: leave as-is if not 10 digits

In [103]:
sug_report = 'away_volunteers.csv'
df = pd.read_csv(
    sug_report,
    usecols=["Location", "First Name", "Last Name", "Email", "Phone", "Item", "Item Comment", "Sign Up", "Start Date/Time (mm/dd/yyyy)", "Sign Up Comment", "Sign Up Timestamp"],
    dtype=str
)
df["Phone"] = df["Phone"].apply(format_phone)

# get good start date/time columns
df["start_date_time"] = pd.to_datetime(
    df["Start Date/Time (mm/dd/yyyy)"],
    format="%m/%d/%Y %I:%M %p",
    errors="coerce"
)
df["start_date"] = df["start_date_time"].dt.date
df["start_time"] = df["start_date_time"].dt.time
df = df.drop(columns=["Start Date/Time (mm/dd/yyyy)", "start_date_time"])

# get rid of rows with no signups
df = df.dropna(subset=["Sign Up Timestamp"])

df["signup_timestamp"] = pd.to_datetime(
    df["Sign Up Timestamp"],
    format="%m/%d/%Y %I:%M:%S %p",
    errors="coerce"
)

# clean up email
df["Email"] = df["Email"].str.strip().str.lower()

# build a full name
df["full_name"] = (
    df["First Name"].fillna("").str.strip().str.lower()
    + "|" +
    df["Last Name"].fillna("").str.strip().str.lower()
)

volunteer_df = df
volunteer_df["Shift"] = volunteer_df["Item Comment"].str.extract(r"^(Shift [12])", expand=False)

## Build List of Cleared Volunteers

In [104]:
# build a pattern for roles that don't need clearance
non_cleared_roles = [
    "Fry Trailer",
    "Booth Volunteer",
    "Kona Ice",
    "AGMB Spiritwear Sale"
]
non_cleared_roles_pat = "|".join(non_cleared_roles)

# Get a unique list of volunteers
unique_volunteers = volunteer_df[~volunteer_df["Item"].str.contains(non_cleared_roles_pat, na=False)][['First Name', 'Last Name', 'Email', 'Phone']].\
    sort_values(by=["Phone", "Email"], ascending=False).\
    drop_duplicates(subset=["First Name", "Last Name"], keep="first").\
    sort_values(by=["Last Name", "First Name"], ascending=True).\
    reset_index(drop=True)

# create helper columns to join on
unique_volunteers["full_name"] = (
    unique_volunteers["First Name"]
        .str.lower()
        .str.replace(r"[^a-z]", "", regex=True)  # remove non-ASCII
        .str.strip()
    + "|" +
    unique_volunteers["Last Name"]
        .str.lower()
        .str.replace(r"[^a-z]", "", regex=True)  # remove non-ASCII
        .str.strip()
)

# Create blank column
unique_volunteers["volunteer_cleared"] = "no-information"
unique_volunteers["match_method"] = "no-match"

# Check for not cleared by name
mask = (
    (unique_volunteers["match_method"] == "no-match")
    & (unique_volunteers["full_name"].isin(not_cleared_volunteers_df["full_name"]))
)
unique_volunteers.loc[mask, ["match_method", "volunteer_cleared"]] = ["name", "no"]

# Check for not cleared by Email
mask = (
    (unique_volunteers["match_method"] == "no-match")
    & (unique_volunteers["Email"].isin(not_cleared_volunteers_df["Email"]))
)
unique_volunteers.loc[mask, ["match_method", "volunteer_cleared"]] = ["email", "no"]

# Check for cleared by name
mask = (
    (unique_volunteers["match_method"] == "no-match")
    & (unique_volunteers["full_name"].isin(cleared_volunteers_df["full_name"]))
)
unique_volunteers.loc[mask, ["match_method", "volunteer_cleared"]] = ["name", "yes"]

# Check for cleared by Email
mask = (
    (unique_volunteers["match_method"] == "no-match")
    & (unique_volunteers["full_name"].isin(cleared_volunteers_df["Email"]))
)
unique_volunteers.loc[mask, ["match_method", "volunteer_cleared"]] = ["email", "yes"]

# cleanup helper column if you don’t need it
#unique_volunteers = unique_volunteers.drop(columns=["full_name"])
# save to google docs
sheet_status_list = "volunteer_list_w_status"
try:
    sheet_status = spreadsheet.worksheet(sheet_status_list)
except:
    sheet_status = spreadsheet.add_worksheet(title=sheet_status_list, rows=100, cols=20)

#warwick.batch_clear(["warwick"])
sheet_status.batch_clear(["A:M"])

#warwick.clear()
save_df = unique_volunteers.drop(columns=["full_name"])
set_with_dataframe(sheet_status, save_df.sort_values(by=["Last Name", "First Name"], ascending=True).reset_index(drop=True), row=1, col=1)
sheet_status.resize(rows=len(save_df)+1, cols=len(save_df.columns))


{'spreadsheetId': '1RI6uHPJ6R7oFwAF5WNWnmaYdxuSzVBsbmVcm5VvAxJ4',
 'replies': [{}]}

## Add in the clearance information and save to google sheets

### Add has_clearances column to the volunteer_df

In [105]:
# mark as N/A if the role doesn't require clearances
volunteer_df["has_clearances"] = volunteer_df["Item"].str.contains(non_cleared_roles_pat, na=False).map(
    {True: "N/A", False: None}
)

# give me only my cleared volunteers
cleared = unique_volunteers[unique_volunteers["volunteer_cleared"].str.lower() == "yes"].copy()

# generate cleared emails
cleared_emails = set(cleared["Email"].dropna().str.strip().str.lower())

# generate cleared list of full_names
cleared["full_name"] = (
    cleared["First Name"]
        .fillna("")
        .str.lower()
        .str.replace(r"[^a-z]", "", regex=True)  # remove non-ASCII
        .str.strip()
        
    + "|" +
    cleared["Last Name"]
        .fillna("")
        .str.lower()
        .str.replace(r"[^a-z]", "", regex=True)  # remove non-ASCII
        .str.strip()
        
)
cleared_names = set(cleared["full_name"])

# Check on name first to see if cleared
volunteer_df.loc[
    volunteer_df["has_clearances"].isna() & volunteer_df["full_name"].isin(cleared_names),
    "has_clearances"
] = "yes"

# check on email next to see if cleared
volunteer_df.loc[
    volunteer_df["has_clearances"].isna() & volunteer_df["Email"].isin(cleared_emails),
    "has_clearances"
] = "yes"

# all other empty is not cleared
volunteer_df["has_clearances"] = volunteer_df["has_clearances"].fillna("no")

### Save to google docs

In [106]:
sheet = "volunteer_roles"
try:
    sheet_status = spreadsheet.worksheet(sheet)
except:
    sheet_status = spreadsheet.add_worksheet(title=sheet, rows=100, cols=20)

sheet_status.batch_clear(["A:M"])
col_order = [
    "start_time",
    "start_date",
    "has_clearances",
    "First Name",
    "Last Name",
    "Item",
    "Shift",
    "Email",
    "Phone",

]
save_df = volunteer_df.drop(columns=["full_name", "Location", "Sign Up", "signup_timestamp", "Item Comment"])[col_order]
set_with_dataframe(sheet_status, save_df.sort_values(by=["start_date", "Shift", "Item", "start_time", "Last Name", "First Name"], ascending=True).reset_index(drop=True), row=1, col=1)
sheet_status.resize(rows=len(save_df)+1, cols=len(save_df.columns))

{'spreadsheetId': '1RI6uHPJ6R7oFwAF5WNWnmaYdxuSzVBsbmVcm5VvAxJ4',
 'replies': [{}]}

In [107]:
save_df[
    save_df["start_date"].astype(str).str.contains("2025-10-24", na=False)]

Unnamed: 0,start_time,start_date,has_clearances,First Name,Last Name,Item,Shift,Email,Phone
496,16:00:00,2025-10-24,yes,Tascha,Peterman,Bus Transportation for Prop/Pit Assist,,tascha.peterman@gmail.com,
497,16:00:00,2025-10-24,yes,Eric,Peterman,Bus Transportation for Prop/Pit Assist,,crazekayakr@gmail.com,
499,16:00:00,2025-10-24,yes,Katy,Griffith,Bus/General Chaperone,,katyrosegriffith@gmail.com,
500,16:00:00,2025-10-24,yes,Laura,Chrobak,Bus/General Chaperone,,thechrobaks@gmail.com,
501,16:00:00,2025-10-24,yes,Amy,Brunnquell,Bus/General Chaperone,,alefranc@yahoo.com,
502,16:00:00,2025-10-24,yes,Robert,Ruddy,Bus/General Chaperone,,agsd@ruddy.net,
503,16:00:00,2025-10-24,yes,Marcia,Rapone,Bus/General Chaperone,,marcia131@hotmail.com,
504,16:00:00,2025-10-24,yes,Jesseca,Wolf,Bus/General Chaperone,,jwolf625@gmail.com,
505,16:00:00,2025-10-24,yes,Michael,Hill,Driver and Navigator,,myshadow70@gmail.com,
506,16:00:00,2025-10-24,yes,Michael,Lindsay,Driver and Navigator,,mlindsay29@gmail.com,


In [114]:
volunteer_df.groupby(["Last Name", "First Name"]).size().reset_index(name="count").sort_values(by="count", ascending=False).reset_index()

Unnamed: 0,index,Last Name,First Name,count
0,86,Peterman,Tascha,29
1,18,Brunnquell,Amy,24
2,85,Peterman,Eric,23
3,109,Wolf,Jesseca,19
4,58,Laughlin,Harmony,17
...,...,...,...,...
110,107,White,Roarke,1
111,110,Wolf,Tammy,1
112,111,Yu,Christine,1
113,113,muah,anaia,1


In [119]:
unique_items = volunteer_df['Item'].unique()
exclude_items = [
        '5:45PM-6:00PM', '6:00PM - 6:15PM', '6:15PM-6:30PM',
        '6:30PM-6:45PM', '6:45PM-7:00PM', '7:00-7:15pm', '7:15-7:30PM',
        '7:30-7:45PM', '7:45-8:00PM', '8:00-8:15PM', '8:15-8:30PM',
        'Brownies', 'Cookie tray', 'Dessert-sized paper plates',
        'Dinner-sized paper plates', 'Entrees/ Main Dishes',
        'Flavored Seltzer', 'Fruit tray / fruit salad', 'Napkins',
        'Plastic Forks', 'Plastic knives', 'Plastic Spoons',
        'Rice Krispy treats', 'Rolls', 'Salad / veggie tray', 'Soda', 'Water']

exclude_items

['5:45PM-6:00PM',
 '6:00PM - 6:15PM',
 '6:15PM-6:30PM',
 '6:30PM-6:45PM',
 '6:45PM-7:00PM',
 '7:00-7:15pm',
 '7:15-7:30PM',
 '7:30-7:45PM',
 '7:45-8:00PM',
 '8:00-8:15PM',
 '8:15-8:30PM',
 'Brownies',
 'Cookie tray',
 'Dessert-sized paper plates',
 'Dinner-sized paper plates',
 'Entrees/ Main Dishes',
 'Flavored Seltzer',
 'Fruit tray / fruit salad',
 'Napkins',
 'Plastic Forks',
 'Plastic knives',
 'Plastic Spoons',
 'Rice Krispy treats',
 'Rolls',
 'Salad / veggie tray',
 'Soda',
 'Water']

In [125]:
vol_rank = volunteer_df[ ~volunteer_df["Item"].isin(exclude_items) ] \
    .groupby(["Last Name", "First Name"]) \
    .size() \
    .reset_index(name="count") \
    .sort_values(by="count", ascending=False) \
    .reset_index(drop=True)
vol_rank.head(20)

Unnamed: 0,Last Name,First Name,count
0,Peterman,Tascha,29
1,Peterman,Eric,23
2,Brunnquell,Amy,21
3,Wolf,Jesseca,19
4,Laughlin,Harmony,17
5,Feller,Wendy,15
6,Hill,Michael,15
7,Griffith,Katy,15
8,Scheibe,Elizabeth,15
9,Stephenson,Julie,14


In [126]:
vol_rank.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Last Name   72 non-null     object
 1   First Name  72 non-null     object
 2   count       72 non-null     int64 
dtypes: int64(1), object(2)
memory usage: 1.8+ KB


In [111]:
volunteer_df.to_parquet("output.parquet", engine="pyarrow")

In [112]:
import duckdb
import polars as pl
csv_file = "volunteers_roles.csv"

# Run a query directly on the CSV
result = duckdb.query(f"""
    SELECT *
    FROM read_csv_auto('{csv_file}')
    where has_clearances not in ('yes', 'N/A')
""").pl()  # convert result to Pandas DataFrame

result

column0,start_time,start_date,has_clearances,First Name,Last Name,Item,Shift,Email,Phone
i64,time,date,str,str,str,str,str,str,str
71,12:00:00,2025-09-20,"""no""","""Rebecca""","""Tabbutt""","""Pit Assist""",,"""rebeccatabbutt@comcast.net""","""484.888.1840"""
172,12:00:00,2025-10-04,"""no""","""Heather""","""Muah""","""Bus/General Chaperone""",,"""hling@ateina.com.lr""","""215.520.1437"""
237,12:00:00,2025-10-11,"""no""","""Jason""","""Feller""","""Prop Assist""",,"""jason.r.feller@gmail.com""","""240.672.4552"""
330,12:00:00,2025-11-01,"""no""","""Jason""","""Feller""","""Bus Transportation for Prop/Pi…",,"""jason.r.feller@gmail.com""","""240.672.4552"""
331,12:00:00,2025-11-01,"""no""","""Scott""","""Dixon""","""Bus Transportation for Prop/Pi…",,,
349,12:00:00,2025-11-01,"""no""","""Scott""","""Dixon""","""Prop Assist""",,,
350,12:00:00,2025-11-01,"""no""","""Jason""","""Feller""","""Prop Assist""",,"""jason.r.feller@gmail.com""","""240.672.4552"""
