In [24]:
import tkinter as tk
from tkinter import filedialog
import pandas as pd
import os

def extract_eid(email):
    if isinstance(email, str) and '@' in email:
        return email.split('@')[0]
    return email

def determine_event_type(name):
    name_lower = name.lower()
    keywords_401k = ["401k", "401(k)", "summer camp", "summer", "nj", "new joiner", "retirement"]
    keywords_espp = ["espp", "summer camp", "summer", "new joiner"]

    is_401k = any(keyword in name_lower for keyword in keywords_401k)
    is_espp = any(keyword in name_lower for keyword in keywords_espp)

    if is_401k and is_espp:
        return "401k/ESPP"
    elif is_401k:
        return "401k"
    elif is_espp:
        return "ESPP"
    else:
        return "Other"

def extract_event_date(name):
    return name.split()[0]

def process_file(file_path):
    file_name = os.path.basename(file_path)
    try:
        df = pd.read_csv(file_path)
        df = df[df['Participant Id'].notna()]
        df = df.drop_duplicates(subset='Participant Id', keep='first')
        df['Event Name'] = file_name
        df['Event Type'] = determine_event_type(file_name)
        df['Event Date'] = extract_event_date(file_name)
        return df[['Participant Id', 'Event Name', 'Event Type', 'Event Date']]
    except Exception as e:
        print(f"An error occurred: {e}")
        return pd.DataFrame()

def update_attendee_tracker(filtered_data):
    file_path = "Attendee-Database.csv"
    if os.path.exists(file_path):
        wc_db = pd.read_csv(file_path)
    else:
        wc_db = pd.DataFrame(columns=["Participant Id", "Event Name", "Event Type", "Event Date"])
    wc_db = pd.concat([wc_db, filtered_data], ignore_index=True)
    return wc_db

def main():
    root = tk.Tk()
    root.withdraw()

    file_dialog = tk.Toplevel(root)
    file_dialog.title("Select one or more Attendee Forms")
    file_dialog.grab_set()  # Make the file dialog window modal
    file_dialog.focus_force()  # Bring the file dialog window to the front

    file_paths = filedialog.askopenfilenames(
        master=file_dialog,
        title="Select one or more Attendee Forms",
        filetypes=[("CSV files", "*.csv")]
    )

    file_dialog.destroy()
    root.destroy()

    if not file_paths:
        print("No files were selected. Exiting the script.")
        return

    all_filtered_attendees = pd.DataFrame()
    for file_path in file_paths:
        filtered_attendees = process_file(file_path)
        all_filtered_attendees = pd.concat([all_filtered_attendees, filtered_attendees], ignore_index=True)

    attendee_tracker = update_attendee_tracker(all_filtered_attendees)
    attendee_tracker['EID Extract'] = attendee_tracker['Participant Id'].apply(extract_eid)

    # Before merging, drop any existing 'First Event Attended' and 'Last Event Attended' columns
    attendee_tracker = attendee_tracker.drop(columns=['First Event Attended', 'Last Event Attended'], errors='ignore')

    # Compute 'First Event Attended' and 'Last Event Attended'
    attendee_dates = attendee_tracker.groupby('EID Extract')['Event Date'].agg(['min', 'max']).reset_index()
    attendee_dates.rename(columns={'min': 'First Event Attended', 'max': 'Last Event Attended'}, inplace=True)

    # Merge to main DataFrame
    attendee_tracker = pd.merge(attendee_tracker, attendee_dates, how='left', left_on='EID Extract', right_on='EID Extract')

    attendee_tracker['Event Count'] = attendee_tracker.groupby('EID Extract')['EID Extract'].transform('count')
    attendee_tracker['Status'] = attendee_tracker['Event Count'].apply(lambda x: 'First Time' if x == 1 else 'Returning')

    attendee_tracker.to_csv("Attendee-Database.csv", index=False)
    print("Final Attendee Tracker:")
    print(attendee_tracker.head())

if __name__ == "__main__":
    main()

Final Attendee Tracker:
                     Participant Id                     Event Name Event Type  \
0          beth.mckim@accenture.com  2023.09.08 Retirement 101.csv       401k   
1     malikul.muhamad@accenture.com  2023.09.08 Retirement 101.csv       401k   
2            e.garner@accenture.com  2023.09.08 Retirement 101.csv       401k   
3      pamela.kittman@accenture.com  2023.09.08 Retirement 101.csv       401k   
4  stephanie.griparis@accenture.com  2023.09.08 Retirement 101.csv       401k   

   Event Date         EID Extract  Event Count      Status  \
0  2023.09.08          beth.mckim            2   Returning   
1  2023.09.08     malikul.muhamad            1  First Time   
2  2023.09.08            e.garner            1  First Time   
3  2023.09.08      pamela.kittman            6   Returning   
4  2023.09.08  stephanie.griparis            5   Returning   

  First Event Attended Last Event Attended  
0           2023.09.08          2023.10.13  
1           2023.09.08    

In [23]:
# RESET DATABASE
file_path = "Attendee-Database.csv"

# Read the CSV file, including only the header
df = pd.read_csv(file_path, nrows=0)

# Write the empty DataFrame (with headers only) back to the CSV, overwriting the original file
df.to_csv(file_path, index=False)

# Print the number of rows remaining in the DataFrame
print(f"Number of rows remaining in the DataFrame: {len(df)}")

Number of rows remaining in the DataFrame: 0
