In [2]:
import openpyxl
import os
from datetime import datetime
from collections import defaultdict

# Function to check for duplicate numbers in column K and copy specified columns to a new sheet
def check_for_duplicates(master_file, output_file):
    try:
        print("Loading the Master workbook...")

        # Load the 'Master.xlsx' workbook
        master_wb = openpyxl.load_workbook(master_file)

        # Load the 'Form1' sheet from the 'Master.xlsx' file
        master_sheet = master_wb['Form1']  # Using 'Form1' as the sheet name

        # Create a new workbook for the output file
        output_wb = openpyxl.Workbook()
        output_sheet = output_wb.active
        output_sheet.title = "Duplicates MTD"

        # Add headers to the new sheet
        headers = [
            "ID", "Submitter", "Date Of Call", "Type of Review", "Name (Agent)", "Name of TL", "CID"
        ]
        for col_idx, header in enumerate(headers, start=1):
            output_sheet.cell(row=1, column=col_idx, value=header)

        print("Processing rows for duplicates in column K...")

        # Get the current month and year
        current_month = datetime.now().month
        current_year = datetime.now().year

        # Set of valid submitters
        valid_submitters = {
            "jansen.machado@365roi.com",
            "justin.dennis@365roi.com",
            "daniel.suarez@365roi.com"
        }

        # Dictionary to store phone numbers and their corresponding rows
        phone_numbers = defaultdict(list)

        # Iterate over each row in the master sheet
        for row in master_sheet.iter_rows(min_row=2, max_row=master_sheet.max_row):
            date_value = row[2].value  # Column C is index 2
            review_type = row[7].value  # Column H is index 7
            phone_number = row[10].value  # Column K is index 10
            submitter = row[3].value  # Column D is index 3

            if date_value and isinstance(date_value, datetime):
                if date_value.month == current_month and date_value.year == current_year:
                    if review_type in ["Monitor", "Additional Monitor"]:
                        if phone_number:
                            phone_numbers[phone_number].append(row)

        # Filter out phone numbers that are not duplicates or do not meet the submitter criteria
        filtered_rows = []
        for phone_number, rows in phone_numbers.items():
            if len(rows) > 1:
                if any(row[3].value in valid_submitters for row in rows):
                    filtered_rows.extend(rows)

        # Sort filtered rows by phone number (smallest to largest)
        filtered_rows.sort(key=lambda row: row[10].value)

        # Write filtered rows to the new sheet
        row_idx = 2  # Starting from row 2 in the new sheet (row 1 is for headers)
        for row in filtered_rows:
            columns_to_copy = [0, 3, 2, 7, 8, 9, 10]  # Columns A, D, C, H, I, J, K
            for col_idx, master_col_idx in enumerate(columns_to_copy, start=1):
                cell_value = row[master_col_idx].value
                if col_idx == 3:  # Column C (Date Of Call) should be in short date format
                    if isinstance(cell_value, datetime):
                        cell_value = cell_value.strftime('%m/%d/%Y')
                output_sheet.cell(row=row_idx, column=col_idx, value=cell_value)
            row_idx += 1

        # Save the output workbook with the new sheet
        output_wb.save(output_file)

        print(f"Output successfully written to {output_file}, sheet 'Duplicates MTD'")

    except PermissionError:
        print("Permission denied: Make sure the file is not open or being used by another program.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Get the current working directory where the script and workbooks are located
folder_path = os.getcwd()

# Define file paths based on the current folder
master_file = os.path.join(folder_path, 'Master.xlsx')
output_file = os.path.join(folder_path, 'Dup_V1.xlsx')

# Process and write the data for duplicate numbers
check_for_duplicates(master_file, output_file)

Loading the Master workbook...
Processing rows for duplicates in column K...
Output successfully written to C:\Users\Remote\Documents\Automation\Dup_V1.xlsx, sheet 'Duplicates MTD'
