In [17]:
import tkinter as tk
from tkinter import filedialog
import pandas as pd
import os

def extract_eid(email):
    return email.split('@')[0] if '@' in email else email

def determine_event_type(name):
    if "ESPP" in name:
        return "ESPP"
    elif any(keyword in name for keyword in ["401", "401k", "401(k)"]):
        return "401k"
    else:
        return "Other"

def extract_event_date(name):
    return name.split()[0]

def process_file(file_path):
    file_name = os.path.basename(file_path)
    df = pd.read_csv(file_path)

    if 'Role' not in df.columns or 'Participant Id' not in df.columns:
        print(f"Required columns missing in file: {file_name}. Skipping this file.")
        return pd.DataFrame()

    df = df[df['Participant Id'].notna() & (df['Role'] == 'Attendee')]
    df = df[['Participant Id']]  # Select only the Participant Id column
    df['Event Name'] = file_name
    df['Event Type'] = determine_event_type(file_name)
    df['Event Date'] = extract_event_date(file_name)
    return df

def update_attendee_tracker(filtered_data):
    file_path = "Attendee-Tracker.csv"
    if os.path.exists(file_path):
        wc_db = pd.read_csv(file_path)
    else:
        wc_db = pd.DataFrame(columns=["Participant Id", "Event Name", "Event Type", "Event Date"])
    wc_db = pd.concat([wc_db, filtered_data], ignore_index=True)
    wc_db.to_csv(file_path, index=False)
    return wc_db

In [18]:
def main():
    root = tk.Tk()
    root.withdraw()
    file_paths = filedialog.askopenfilenames(
        title="Select one or more Attendee Report files",
        filetypes=[("CSV files", "*.csv")]
    )
    root.destroy()

    if not file_paths:
        print("No files were selected. Exiting the script.")
        return

    all_filtered_attendees = pd.DataFrame()
    for file_path in file_paths:
        filtered_attendees = process_file(file_path)
        all_filtered_attendees = pd.concat([all_filtered_attendees, filtered_attendees], ignore_index=True)

    attendee_tracker = update_attendee_tracker(all_filtered_attendees)
    attendee_tracker['EID Extract'] = attendee_tracker['Participant Id'].apply(extract_eid)
    attendee_tracker['Event Count'] = attendee_tracker.groupby('EID Extract')['EID Extract'].transform('count')
    attendee_tracker['Status'] = attendee_tracker['Event Count'].apply(lambda x: 'First Time' if x == 1 else 'Returning')
    
    first_event = attendee_tracker.groupby('EID Extract')['Event Date'].min().rename('First Event Attended')
    last_event = attendee_tracker.groupby('EID Extract')['Event Date'].max().rename('Last Event Attended')
    
    attendee_tracker = attendee_tracker.merge(first_event, on='EID Extract').merge(last_event, on='EID Extract')
    
    print("Final Attendee Tracker:")
    print(attendee_tracker.head())  # Display the first few rows of the final DataFrame

In [19]:
if __name__ == "__main__":
    main()


Required columns missing in file: 2023.10.27 ExecSpeaker Dan Finn .csv. Skipping this file.
Required columns missing in file: 2023.11.10 GoodPlansBadTimes.csv. Skipping this file.
Required columns missing in file: 2023.12.08 ESPP 401 Attendees.csv. Skipping this file.
Final Attendee Tracker:
                  Participant Id                     Event Name Event Type  \
0       beth.mckim@accenture.com  2023.09.08 Retirement 101.csv      Other   
1       beth.mckim@accenture.com  2023.09.08 Retirement 101.csv      Other   
2  malikul.muhamad@accenture.com  2023.09.08 Retirement 101.csv      Other   
3         e.garner@accenture.com  2023.09.08 Retirement 101.csv      Other   
4   pamela.kittman@accenture.com  2023.09.08 Retirement 101.csv      Other   

   Event Date      EID Extract  Event Count     Status First Event Attended  \
0  2023.09.08       beth.mckim            6  Returning           2023.09.08   
1  2023.09.08       beth.mckim            6  Returning           2023.09.08   
2

In [16]:
# RESET DATABASE
file_path = "Attendee-Tracker.csv"

# Read the CSV file, including only the header
df = pd.read_csv(file_path, nrows=0)

# Write the empty DataFrame (with headers only) back to the CSV, overwriting the original file
df.to_csv(file_path, index=False)

# Print the number of rows remaining in the DataFrame
print(f"Number of rows remaining in the DataFrame: {len(df)}")

Number of rows remaining in the DataFrame: 0
