In [32]:
#Import Modules

import os
import pandas as pd
import numpy as np
import openpyxl
from openpyxl import load_workbook
from openpyxl.utils import column_index_from_string
import re

In [33]:
#Functions

#Check if an Excel file has more than 1 sheet
def check_single_sheet(file_path):
    try:
        # Load the Excel workbook
        wb = openpyxl.load_workbook(file_path, read_only=True)

        # Get all sheet names
        sheet_names = wb.sheetnames

        # Check if there is only one sheet
        if len(sheet_names) != 1:
            print(f"❌ Error: File {file_path} should only have 1 sheet, but found {len(sheet_names)} sheets: {sheet_names}")
        else:
            print(f"✅ File {file_path} loaded successfully. Sheet name: {sheet_names[0]}")
        
        wb.close()
        
    except Exception as e:
        print(f"❌ Failed to load Excel file: {e}")


In [34]:
#Errors and Warnings Explained

#Errors must be fixed before automation is run
#Warnings should be investigated, fix is not necessary for automation to run

In [35]:
#Search in the folder 'Data Files' for 4 application comparative files, and assign them to 4 variables chronologically

term_order = {"Winter": 1, "Spring": 2, "Summer": 3, "Fall": 4}

def extract_term_and_year(filename):
    match = re.search(r"(Winter|Spring|Summer|Fall)\s*(\d{4})", filename, re.IGNORECASE)
    if match:
        term = match.group(1).capitalize()
        year = int(match.group(2))
        return (year, term_order[term])
    return None

def get_chronological_application_comparative_files(folder_path):
    files = os.listdir(folder_path)
    matched_files = []

    for file in files:
        if not file.lower().endswith(".xlsx"):
            continue
        if re.search(r"applications?\s*comparative", file, re.IGNORECASE):
            term_info = extract_term_and_year(file)
            if term_info:
                matched_files.append((term_info, os.path.join(folder_path, file)))

    if len(matched_files) != 4:
        raise ValueError(f"Expected exactly 4 'Application Comparative' files, found {len(matched_files)}.")

    matched_files.sort(key=lambda x: (x[0][0], x[0][1]))

    return [f[1] for f in matched_files]

data_folder = "Data Files"

(
    term_1_file_application_comparative_filename,
    term_2_file_application_comparative_filename,
    term_3_file_application_comparative_filename,
    term_4_file_application_comparative_filename,
) = get_chronological_application_comparative_files(data_folder)

print("Term 1 Application Comparative:", term_1_file_application_comparative_filename)
print("Term 2 Application Comparative:", term_2_file_application_comparative_filename)
print("Term 3 Application Comparative:", term_3_file_application_comparative_filename)
print("Term 4 Application Comparative:", term_4_file_application_comparative_filename)

Term 1 Application Comparative: Data Files\zTestingData-Application Comparative -Summer 2025 as of June 02, 2025.xlsx
Term 2 Application Comparative: Data Files\zTestingData-Application Comparative -Fall 2025 as of June 02, 2025.xlsx
Term 3 Application Comparative: Data Files\zTestingData-Application Comparative -Winter 2026 as of June 02, 2025.xlsx
Term 4 Application Comparative: Data Files\zTestingData-Application Comparative -Spring 2026 as of June 02, 2025.xlsx


In [36]:
#Search in the folder 'Data Files' for 4 application details files, and assign them to 4 variables chronologically

term_order = {"Winter": 1, "Spring": 2, "Summer": 3, "Fall": 4}

def extract_term_and_year(filename):
    match = re.search(r"(Winter|Spring|Summer|Fall)\s*(\d{4})", filename, re.IGNORECASE)
    if match:
        term = match.group(1).capitalize()
        year = int(match.group(2))
        return (year, term_order[term])
    return None

def get_chronological_applications_reports_details_files(folder_path):
    files = os.listdir(folder_path)
    matched_files = []

    for file in files:
        if not file.lower().endswith(".xlsx"):
            continue
        if re.search(r"applications?\s*reports?\s*details?", file, re.IGNORECASE):
            term_info = extract_term_and_year(file)
            if term_info:
                matched_files.append((term_info, os.path.join(folder_path, file)))

    if len(matched_files) != 4:
        raise ValueError(f"Expected exactly 4 'Application Comparative' files, found {len(matched_files)}.")

    matched_files.sort(key=lambda x: (x[0][0], x[0][1]))

    return [f[1] for f in matched_files]

data_folder = "Data Files"

(
    term_1_file_application_details_filename,
    term_2_file_application_details_filename,
    term_3_file_application_details_filename,
    term_4_file_application_details_filename,
) = get_chronological_applications_reports_details_files(data_folder)

print("Term 1 Applications Reports Details:", term_1_file_application_details_filename)
print("Term 2 Applications Reports Details:", term_2_file_application_details_filename)
print("Term 3 Applications Reports Details:", term_3_file_application_details_filename)
print("Term 4 Applications Reports Details:", term_4_file_application_details_filename)

Term 1 Applications Reports Details: Data Files\zTestingData-Applications Reports Details-IR-New-Summer 2025 as of June 02, 2025.xlsx
Term 2 Applications Reports Details: Data Files\zTestingData-Applications Reports Details-IR-New-Fall 2025 as of June 02, 2025.xlsx
Term 3 Applications Reports Details: Data Files\zTestingData-Applications Reports Details-IR-New-Winter 2026 as of June 02, 2025.xlsx
Term 4 Applications Reports Details: Data Files\zTestingData-Applications Reports Details-IR-New-Spring 2026 as of June 02, 2025.xlsx


In [37]:
check_single_sheet(term_1_file_application_comparative_filename)
check_single_sheet(term_2_file_application_comparative_filename)
check_single_sheet(term_3_file_application_comparative_filename)
check_single_sheet(term_4_file_application_comparative_filename)

✅ File Data Files\zTestingData-Application Comparative -Summer 2025 as of June 02, 2025.xlsx loaded successfully. Sheet name: Application Comparative
✅ File Data Files\zTestingData-Application Comparative -Fall 2025 as of June 02, 2025.xlsx loaded successfully. Sheet name: Application Comparative
✅ File Data Files\zTestingData-Application Comparative -Winter 2026 as of June 02, 2025.xlsx loaded successfully. Sheet name: Application Comparative
✅ File Data Files\zTestingData-Application Comparative -Spring 2026 as of June 02, 2025.xlsx loaded successfully. Sheet name: Application Comparative


In [38]:
print("testing\n", check_single_sheet(term_4_file_application_comparative_filename))

✅ File Data Files\zTestingData-Application Comparative -Spring 2026 as of June 02, 2025.xlsx loaded successfully. Sheet name: Application Comparative
testing
 None


In [39]:
#Data check term_1_file_application_comparative

#Empty lists for storing found errors and warnings
term_1_file_application_comparative_errors = []
term_1_file_application_comparative_warnings = []
term_1_file_application_details_errors = []
term_1_file_application_details_warnings = []

term_2_file_application_comparative_errors = []
term_2_file_application_comparative_warnings = []
term_2_file_application_details_errors = []
term_2_file_application_details_warnings = []

term_3_file_application_comparative_errors = []
term_3_file_application_comparative_warnings = []
term_3_file_application_details_errors = []
term_3_file_application_details_warnings = []

term_4_file_application_comparative_errors = []
term_4_file_application_comparative_warnings = []
term_4_file_application_details_errors = []
term_4_file_application_details_warnings = []

