In [26]:
#Import Modules

import pandas as pd
import numpy as np
import openpyxl
from openpyxl import load_workbook
from openpyxl.utils import column_index_from_string
import re

#Load files. The file names must match exactly the names of the application data files. Manual Input Required.

term_1_file_application_comparative = "Application Comparative-Summer 2025 as of May 16, 2025.xlsx"
term_1_file_application_details = "Applications Reports Details-IR-New-Summer 2025 as of May 01, 2025 (Fake Data).xlsx"

term_2_file_application_comparative = "Application Comparative-Fall 2025 as of May 16, 2025.xlsx"
term_2_file_application_details = "Applications Reports Details-IR-New-Fall 2025 as of May 01, 2025 (Fake Data).xlsx"

term_3_file_application_comparative = "Application Comparative-Winter 2026 as of May 16, 2025.xlsx"
term_3_file_application_details = "Applications Reports Details-IR-New-Winter 2026 as of May 01, 2025 (Fake Data).xlsx"

term_4_file_application_comparative = "Application Comparative-Spring 2026 as of May 16, 2025.xlsx"
term_4_file_application_details = "Applications Reports Details-IR-New-Spring 2026 as of May 01, 2025 (Fake Data).xlsx"

#Manually define variables
variable_Extraction_date = "May 25, 2025" #Enter date of data extraction. Format: "Month DD, YYYY"
variable_range_applications_academic_year = "2025-26 & 2026-27" #Enter the academic year(s) of your application data. Format: "YYYY-YY" or "YYYY-YY & YYYY-YY"
variable_current_academic_year_file_name = "2025-26" #Enter academic year for output file name. Format: "YYYY-YY"

#Course Code List (must match all courses on template, using course codes in Power Campus)

#List all Business Administration programs on template
program_ID_Business_Administration = [
    'BABUSC',
    'BAACTD',
    'BABUSD',
    'BAESPM',
    'BAHRMD',
    'GSCMD',
    'DIGMD',
    'BAATCO',
    'BABSCO',
    'NAO',
    'ALO',
    'HOSMNG',
    'BAHRMC',
    'OFAC'
]

#List all Education and Academic Upgrading programs on template
program_ID_Education_and_Academic_Upgrading = [
    'EAP',
    'HSAFA',
    'HSAD',
    'HSGD',
    'HSNP',
    'UTELEM',
    'UTSEC'
]

#List all University Studies and Environmental Science programs on template
program_ID_University_Studies_and_Environmental_Science = [
    'ENVTD',
    'ENVNC',
    'UTARTS',
    'UTCOMM',
    'UTENG',
    'UTSCGS',
    'UTSCWK',
    'OPEN',
    'GOVNSC',
    'COMSCC',
    'COMSCD',
    'GNARTC',
    'GNSCIC',
    'ANCPDC',
    'NSPDC'
]

#List all Language Instruction for Newcomers to Canada programs on template
program_ID_Language_Instruction_for_Newcomers_to_Canada = [
    'LINC13'
]

#List all Childhood Studies and Human Services programs on template
program_ID_Childhood_Studies_and_Human_Services = [
    'AELCC',
    'ECCERT',
    'ECDIP',
    'CYCD',
    'ECEAC',
    'SOCWKD'
]

#List all Nursing and Allied Health programs on template
program_ID_Nursing_and_Allied_Health = [
    'HCAC',
    'PCPC',
    'ACP',
    'PNR',
    'BSCN',
    'BSCACN',
    'BSCAEE',
    'BSCAFT',
    'IENCN'
]

#List all Pre-employment programs on template
program_ID_Pre_employment = [
    'PAELEC',
    'PAWELD',
    'PAHET',
    'PAPLUM'
]

#List all Power Engineering programs on template
program_ID_Power_Engineering = [
    'POWCM4',
    'POWCM3',
    'POWCP4',
    'POWCO4',
    'POWCO3',
    'PPET'
]

#Combine all program Lists into 1 List for all programs
program_ID_all = (
    program_ID_Business_Administration +
    program_ID_Education_and_Academic_Upgrading +
    program_ID_University_Studies_and_Environmental_Science +
    program_ID_Language_Instruction_for_Newcomers_to_Canada +
    program_ID_Childhood_Studies_and_Human_Services +
    program_ID_Nursing_and_Allied_Health +
    program_ID_Pre_employment +
    program_ID_Power_Engineering
)


#List all possible Population values (All letters must be capitalized here, regardless of whether it is capitalized or not in your data)

population_values = ['DOMESTIC', 'INTERNATIONAL']

#List all possible Year values

year_values = ['1', '2', '3']
    
#Function for dividing numerator by denominator, calculating a decimal value. Return None if denominator is None. Treat numerator as 0 when it is None. 
def division_calculator_decimal(numerator, denominator): 
    if denominator in [None, 0]:
        return None
    if numerator is None:
        numerator = 0
    try:
        return numerator / denominator
    except TypeError:
        return None

In [27]:
#Program Specific Applications Calculations Term 2

# Load the workbook
wb = openpyxl.load_workbook(term_2_file_application_details, data_only=True)

# Try to match any sheet that resembles "Clean Data"
matched_sheet_name = None
for sheet_name in wb.sheetnames:
    normalized = sheet_name.lower().replace(" ", "")
    if normalized == "cleandata":
        matched_sheet_name = sheet_name
        break

if matched_sheet_name is None:
    raise ValueError(f"No sheet matching 'Clean Data' found. Available sheets: {wb.sheetnames}")

# Load the matched sheet into a pandas DataFrame
df_term_2_details = pd.read_excel(term_2_file_application_details, sheet_name=matched_sheet_name)

#Generate Business Administration variables
term_2_variable_dictionary = {}

# Helper to avoid repetitive filtering
def base_filter(df, program, pop, year):
    return df[
        (df['CURRICULUM'].str.replace(' ','').str.upper() == program) &
        (df['Population'].str.replace(' ','').str.upper() == pop) &
        (df['Year'].astype(str).str.strip() == year)
    ]

# Loop through each program, population, and year
for program in program_ID_all:
    for pop in population_values:
        for year in year_values:
            key_base = f"term_2_{program}_{pop}_Year_{year}"

            df_filtered = base_filter(df_term_2_details, program, pop, year)
            
            term_2_variable_dictionary[f"{key_base}_Applications"] = len(df_filtered) if len(df_filtered) > 0 else None
            
            subset = df_filtered[
                (df_filtered['Accepted'].isin(['ACPT', 'ACLN', 'COND'])) &
                ~(df_filtered['Deferred Application'].isin(['ACPT', 'COND']))
            ]
            term_2_variable_dictionary[f"{key_base}_Accepted"] = len(subset) if len(subset) > 0 else None

            subset = df_filtered[
                ~(df_filtered['Payment Pending'].isna() | (df_filtered['Payment Pending'] == ''))
            ]
            term_2_variable_dictionary[f"{key_base}_Payment_Pending"] = len(subset) if len(subset) > 0 else None

            subset = df_filtered[
                df_filtered['Denied'].isin(['APEX', 'DENY', 'NTSL'])
            ]
            term_2_variable_dictionary[f"{key_base}_Denied"] = len(subset) if len(subset) > 0 else None

            subset = df_filtered[
                df_filtered['Declined'].isin(['CCAN', 'DECL'])
            ]
            term_2_variable_dictionary[f"{key_base}_Declined"] = len(subset) if len(subset) > 0 else None

            subset = df_filtered[
                df_filtered['Pending'].isin(['APRO', 'NODE'])
            ]
            term_2_variable_dictionary[f"{key_base}_Pending"] = len(subset) if len(subset) > 0 else None

            subset = df_filtered[
                df_filtered['Waitlisted'].isin(['CDMT', 'RQMT', 'WAIT'])
            ]
            term_2_variable_dictionary[f"{key_base}_Waitlisted"] = len(subset) if len(subset) > 0 else None

            subset = df_filtered[
                df_filtered['Enrollment'].isin(['ENRO'])
            ]
            term_2_variable_dictionary[f"{key_base}_Enrolled"] = len(subset) if len(subset) > 0 else None

            enrolled = term_2_variable_dictionary[f"{key_base}_Enrolled"]
            accepted = term_2_variable_dictionary[f"{key_base}_Accepted"]
            term_2_variable_dictionary[f"{key_base}_Conversion_Rate"] = division_calculator_decimal(enrolled, accepted)


In [29]:
term_2_variable_dictionary['term_2_BAESPM_DOMESTIC_Year_1_Accepted']