In [1]:
import os
import pandas as pd  # type: ignore
import tkinter as tk
from tkinter import filedialog, messagebox
from tkinter.ttk import Progressbar

In [2]:
class LabelFillerApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Label Filler")  # Set the title of the application window

        self.cp_files_path = r"C:\Users\mshah\The Royalty Network\Share - Documents\MASTER ADMINISTRATOR FILE\DATA APPS\Monil Data Base (Don't Touch)"
        self.cp_files = [
            os.path.join(self.cp_files_path, "CP_1.csv"),
            os.path.join(self.cp_files_path, "CP_2.csv")
        ]

        self.label_file = None  # Variable to store the chosen label file
        self.save_location = None  # Variable to store the chosen save location

        self.setup_ui()  # Call the method to set up the UI components
        self.check_files()  # Check for CP files

In [3]:
def setup_ui(self):
        # UI components for displaying label file information
        tk.Label(self.root, text="Label File:").grid(row=0, column=0, padx=10, pady=5, sticky='w')
        self.label_file_label = tk.Label(self.root, text="No file selected")
        self.label_file_label.grid(row=1, column=0, padx=10, pady=5, columnspan=2, sticky='we')

        # Show CP files in the UI
        tk.Label(self.root, text="CP Files:").grid(row=2, column=0, padx=10, pady=5, sticky='w')
        self.cp_files_label = tk.Label(self.root, text="")
        self.cp_files_label.grid(row=3, column=0, padx=10, pady=5, columnspan=2, sticky='we')

        # Button for choosing the label file
        tk.Button(self.root, text="Choose Label File", command=self.choose_label_file).grid(row=4, column=0, padx=10, pady=5)
        
        # Button for choosing the save location
        tk.Button(self.root, text="Choose Save Location", command=self.choose_save_location).grid(row=4, column=1, padx=10, pady=5)
        self.save_location_label = tk.Label(self.root, text="No location selected")
        self.save_location_label.grid(row=5, column=0, padx=10, pady=5, columnspan=2, sticky='we')

        # Button to run the file processing
        self.run_button = tk.Button(self.root, text="Run", command=self.process_files, state=tk.DISABLED)
        self.run_button.grid(row=6, column=0, padx=10, pady=10, columnspan=2)

        # Progress bar for indicating processing status
        self.progress = Progressbar(self.root, orient=tk.HORIZONTAL, length=200, mode='determinate')
        self.progress.grid(row=7, column=0, padx=10, pady=5, columnspan=2)
        self.progress.grid_remove()  # Initially hide the progress bar

        self.status_label = tk.Label(self.root, text="")  # Label to show processing status
        self.status_label.grid(row=8, column=0, padx=10, pady=5, columnspan=2)

In [4]:
def check_files(self):
        # Function to check for the existence of CP files and the label file
        cp_files_found = []
        all_files_exist = True  # Flag to track if all files exist

        for cp_file in self.cp_files:
            if os.path.exists(cp_file):
                cp_files_found.append(os.path.basename(cp_file))  # Add found files to the list
            else:
                cp_files_found.append(f"{os.path.basename(cp_file)} (Not Found)")  # Indicate missing files
                all_files_exist = False  # Set flag to False if any file is missing

        # Update CP files label with line breaks for better formatting
        self.cp_files_label.config(text="\n".join(cp_files_found))  # Show each file on a new line

        # Enable or disable the Run button based on file existence
        if all_files_exist and self.label_file:
            self.run_button.config(state=tk.NORMAL)  # Enable the Run button
        else:
            self.run_button.config(state=tk.DISABLED)  # Disable the Run button

In [5]:
def choose_label_file(self):
        # Function to choose the label file
        label_file = filedialog.askopenfilename(filetypes=[("CSV files", "*.csv")])
        if label_file:
            self.label_file = label_file  # Store the selected label file
            self.label_file_label.config(text=os.path.basename(label_file))  # Update the display
            self.check_files()  # Check files again after choosing label file

In [6]:
def choose_save_location(self):
        # Function to choose a directory for saving output files
        folder = filedialog.askdirectory()
        if folder:
            self.save_location = folder  # Store the selected directory
            self.save_location_label.config(text=folder)  # Update the display
            self.check_files()  # Check files again after choosing save location

In [7]:
def process_files(self):
        # Function to process the selected files and generate output
        if not self.save_location:
            messagebox.showerror("Error", "Please select a save location.")  # Error if no save location
            return

        self.progress.grid()  # Show the progress bar
        self.progress['value'] = 0  # Initialize progress value
        self.status_label.config(text="Processing files...")  # Update status

        try:
            # Initialize an empty DataFrame for merging
            merged_data = pd.DataFrame()

            # Read the CP files in chunks and concatenate them
            for cp_file in self.cp_files:
                if os.path.exists(cp_file):
                    self.status_label.config(text=f"Reading {os.path.basename(cp_file)}...")
                    self.update()  # Update the UI
                    for chunk in pd.read_csv(cp_file, chunksize=100000, on_bad_lines='skip'):  # Read in chunks
                        merged_data = pd.concat([merged_data, chunk], ignore_index=True)  # Concatenate chunks

            self.progress['value'] = 50  # Update progress
            self.status_label.config(text="Merging data...")  # Update status

            # Read the label file in chunks
            label_data = pd.read_csv(self.label_file, chunksize=100000)
            final_label_data = pd.DataFrame()

            for chunk in label_data:
                self.status_label.config(text="Processing label data...")
                self.update()  # Update the UI

                # Strip whitespace from column names
                chunk.columns = chunk.columns.str.strip()
                merged_data.columns = merged_data.columns.str.strip()

                if 'ISRC' not in merged_data.columns:
                    raise KeyError("'ISRC' column missing in CP files.")  # Check for necessary columns
                
                # Fill missing Label Names based on the ISRC mapping
                chunk['Label Name'] = chunk['Label Name'].fillna(
                    chunk['ISRC'].map(merged_data.drop_duplicates(subset=['ISRC']).set_index('ISRC')['Main Album Label'])
                )
                chunk['Label Name'].fillna('No Match Found', inplace=True)  # Fill any remaining NaNs

                final_label_data = pd.concat([final_label_data, chunk], ignore_index=True)  # Combine processed chunks

            output_file = os.path.join(self.save_location, 'final_data_file.csv')  # Define output file path
            final_label_data.to_csv(output_file, index=False)  # Save the final label data to a CSV file

            self.progress['value'] = 100  # Complete progress
            self.status_label.config(text="Processing complete!")  # Update status
            messagebox.showinfo("Success", f"Files processed and saved at {self.save_location}.")  # Success message
        except Exception as e:
            messagebox.showerror("Error", str(e))  # Handle any errors that occur during processing
        finally:
            self.progress.grid_remove()  # Hide the progress bar after processing

In [8]:
def update(self):
        # Helper function to update the UI and process idle tasks
        self.root.update_idletasks()

In [None]:
if __name__ == "__main__":
    root = tk.Tk()  # Create the main application window
    app = LabelFillerApp(root)  # Initialize the application
    root.mainloop()  # Start the Tkinter event loop 