In [None]:
import tkinter as tk
from tkinter import simpledialog, messagebox
import pandas as pd
import os
import subprocess
import sys
from datetime import datetime


def get_user_inputs():
    root = tk.Tk()
    root.withdraw()  # Hide the main window

    valid_event_types = {'ESPP', '401k'}
    valid_months = {'April', 'October'}

    while True:
        data_pull_type = simpledialog.askstring("Data Pull Type", "Enter data pull type (ESPP or 401k):")
        if data_pull_type not in valid_event_types:
            messagebox.showerror("Invalid Input", "Please enter a valid data pull type (ESPP or 401k).")
            continue

        month_option = simpledialog.askstring("Month Option", "Choose month (April or October):")
        if month_option not in valid_months:
            messagebox.showerror("Invalid Input", "Please enter a valid month (April or October).")
            continue

        year = simpledialog.askinteger("Year", "Enter the year:")
        current_year = datetime.now().year
        if year and 1900 <= year <= current_year + 1:
            break
        else:
            messagebox.showerror("Invalid Input", "Please enter a valid year.")
    
    root.destroy()
    return data_pull_type, month_option, year

def calculate_date_range(month_option, year):
    if month_option == "April":
        start_date = f"{year - 1}-10-16"
        end_date = f"{year}-04-15"
    else:  # October
        start_date = f"{year}-04-16"
        end_date = f"{year}-10-15"
    return start_date, end_date

def filter_attendees(data_pull_type, start_date, end_date, attendee_db):
    filtered_attendees = attendee_db[
        (attendee_db['Event Type'] == data_pull_type) &
        (attendee_db['Event Date'] >= start_date) &
        (attendee_db['Event Date'] <= end_date)
    ][['EID Extract', 'Event Type', 'Event Date']]
    # Remove duplicate entries based on 'EID Extract'
    unique_attendees = filtered_attendees.drop_duplicates(subset=['EID Extract'])
    return unique_attendees

def create_unique_filename(base_filename):
    version = 1
    filename, file_extension = os.path.splitext(base_filename)
    new_filename = base_filename

    while os.path.exists(new_filename):
        new_filename = f"{filename}_{version}{file_extension}"
        version += 1

    return new_filename

def save_and_open_csv(data, data_pull_type, month_option, year):
    base_filename = f"DataPull-{data_pull_type}.{month_option}.{year}.csv"
    unique_filename = create_unique_filename(base_filename)

    data.to_csv(unique_filename, index=False)
    try:
        os.startfile(unique_filename)  # For Windows
    except AttributeError:
        if sys.platform == 'darwin':  # For MacOS
            subprocess.call(['open', unique_filename])
        else:  # For Linux
            subprocess.call(['xdg-open', unique_filename])

def main():
    data_pull_type, month_option, year = get_user_inputs()
    start_date, end_date = calculate_date_range(month_option, year)
    attendee_db_path = "Attendee-Database.csv"

    attendee_db_path = "Attendee-Database.csv"
    if not os.path.exists(attendee_db_path):
        messagebox.showerror("File Not Found", "Attendee database file not found. Please check the file path.")
        return

    attendee_db = pd.read_csv(attendee_db_path)
    attendees = filter_attendees(data_pull_type, start_date, end_date, attendee_db)
    save_and_open_csv(attendees, data_pull_type, month_option, year)

if __name__ == "__main__":
    main()


In [2]:
import wx
import pandas as pd
import os
import subprocess
import sys
from datetime import datetime

class DataPullApp(wx.App):
    def OnInit(self):
        self.processing_complete = False
        data_pull_type, month_option, year = self.get_user_inputs()
        if not data_pull_type or not month_option or not year:
            return False

        start_date, end_date = self.calculate_date_range(month_option, year)
        self.process_data(data_pull_type, month_option, year, start_date, end_date)

        if self.processing_complete:
            return False
        return True

    def get_user_inputs(self):
        valid_event_types = ['ESPP', '401k']
        valid_months = ['April', 'October']

        dlg = wx.SingleChoiceDialog(None, "Select data pull type", "Data Pull Type", valid_event_types)
        if dlg.ShowModal() == wx.ID_OK:
            data_pull_type = dlg.GetStringSelection()
        else:
            return None, None, None
        dlg.Destroy()

        dlg = wx.SingleChoiceDialog(None, "Select month", "Month Option", valid_months)
        if dlg.ShowModal() == wx.ID_OK:
            month_option = dlg.GetStringSelection()
        else:
            return None, None, None
        dlg.Destroy()

        year = wx.GetNumberFromUser("Enter the year:", "Year", "Data Pull Year", 2023, 1900, datetime.now().year + 1)
        if year == -1:
            wx.MessageBox("Please enter a valid year.", "Invalid Input", wx.OK | wx.ICON_ERROR)
            return None, None, None

        return data_pull_type, month_option, year

    def get_user_choice(self, message, choices):
        dlg = wx.SingleChoiceDialog(None, message, "Choose an option", choices)
        if dlg.ShowModal() == wx.ID_OK:
            return dlg.GetStringSelection()
        else:
            return None

    def get_user_input(self, title, message):
        dlg = wx.TextEntryDialog(None, message, title)
        if dlg.ShowModal() == wx.ID_OK:
            return dlg.GetValue()
        else:
            return None

    def calculate_date_range(self, month_option, year):
        if month_option == "April":
            start_date = f"{year - 1}-10-16"
            end_date = f"{year}-04-15"
        else:  # October
            start_date = f"{year}-04-16"
            end_date = f"{year}-10-15"
        return start_date, end_date

    def process_file(self, file_path):
        file_name = os.path.basename(file_path)
        df = pd.read_csv(file_path)

        if 'Participant Id' not in df.columns:
            print(f"Required column 'Participant Id' missing in file: {file_name}. Skipping this file.")
            return pd.DataFrame()

        df = df[df['Participant Id'].notna()]
        df = df.drop_duplicates(subset=['Participant Id'])
        df['Event Name'] = file_name
        df['Event Type'] = self.determine_event_type(file_name)
        df['Event Date'] = self.extract_event_date(file_name)
        return df[['Participant Id', 'Event Name', 'Event Type', 'Event Date']]

    def determine_event_type(self, name):
        if "ESPP" in name:
            return "ESPP"
        elif any(keyword in name for keyword in ["401", "401k", "401(k)"]):
            return "401k"
        else:
            return "Other"

    def extract_event_date(self, name):
        return name.split()[0]

    def process_data(self, data_pull_type, month_option, year, start_date, end_date):
        attendee_db_path = "Attendee-Database.csv"
        if not os.path.exists(attendee_db_path):
            wx.MessageBox("Attendee database file not found. Please check the file path.", "Error", wx.ICON_ERROR)
            return

        attendee_db = pd.read_csv(attendee_db_path)
        attendees = self.filter_attendees(data_pull_type, start_date, end_date, attendee_db)
        self.save_and_open_csv(attendees, data_pull_type, month_option, year)

    def filter_attendees(self, data_pull_type, start_date, end_date, attendee_db):
        filtered_attendees = attendee_db[
            (attendee_db['Event Type'] == data_pull_type) &
            (attendee_db['Event Date'] >= start_date) &
            (attendee_db['Event Date'] <= end_date)
        ][['EID Extract', 'Event Type', 'Event Date']]
        return filtered_attendees.drop_duplicates(subset=['EID Extract'])

    def save_and_open_csv(self, data, data_pull_type, month_option, year):
            base_filename = f"DataPull-{data_pull_type}.{month_option}.{year}.csv"
            unique_filename = self.create_unique_filename(base_filename)

            data.to_csv(unique_filename, index=False)
            try:
                os.startfile(unique_filename)  # For Windows
            except AttributeError:
                if sys.platform == 'darwin':  # For MacOS
                    subprocess.call(['open', unique_filename])
                else:  # For Linux
                    subprocess.call(['xdg-open', unique_filename])

            self.processing_complete = True  # Set the flag to True after processing

    def create_unique_filename(self, base_filename):
        version = 1
        filename, file_extension = os.path.splitext(base_filename)
        new_filename = base_filename

        while os.path.exists(new_filename):
            new_filename = f"{filename}_{version}{file_extension}"
            version += 1

        return new_filename

if __name__ == "__main__":
    app = DataPullApp()
    app.MainLoop()


SystemExit: OnInit returned false, exiting...

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


pip install wxPython

pip install openpyxl is required for writing to excel files