In [None]:
# Library list🤖
import glob, logging, warnings, polars as pl, datetime, os, zipfile, xml.dom.minidom
from datetime import datetime as dt, time as t, timedelta
import pandas as pd, numpy as np, sqlalchemy as sa, xlsxwriter
from sqlalchemy import create_engine, text
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from polars.exceptions import ColumnNotFoundError, PanicException
from pathlib import Path
from IPython.display import HTML
from tabulate import tabulate
# -----------------------------------------------------------------------------------------------#
# --- Logging configuration📜 ---
log_directory = Path(os.environ['USERPROFILE']) / r'Concentrix Corporation//CNXVN - WFM Team - Documents//DataBase//DataFrame//BKN//ScriptLogs//'
log_directory.mkdir(parents=True, exist_ok=True) 
log_filename = log_directory / f"import_log_{dt.now():%Y%m%d_%H%M%S}.log"
logging.basicConfig(
    level=logging.INFO,  # (DEBUG, INFO, WARNING, ERROR, CRITICAL)
    format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
    handlers=[
        logging.FileHandler(log_filename, encoding='utf-8'), 
    ],force=True)
# Create logger object
logger = logging.getLogger('ServerImportScript')
# -----------------------------------------------------------------------------------------------#
# Source collection📥
user_credential = Path(os.environ['USERPROFILE']) / r'Concentrix Corporation//CNXVN - WFM Team - Documents//'

# 0️⃣1️⃣[BKN]AHT2🗃️
AHT2_TABLE_NAME = "BCOM.AHT2"
Folder_AHT2 = user_credential / r'DataBase//DataRaw//BKN//AHT2//'
log_AHT2_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//AHT2_log.xlsx'
AHT2_schema = ['FileName', 'ModifiedDate', 'Date', 'Agent Name Display', 'Answered Language Name', 'Measure Names', 'Measure Values']
# 0️⃣2️⃣[BKN]ROSTER🗃️
ROSTER_TABLE_NAME = "BCOM.ROSTER"
Folder_ROSTER = user_credential / r'DataBase//DataRaw//BKN//ROSTER//'
log_ROSTER_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//ROSTER_log.xlsx'
ROSTER_schema = ['FileName', 'ModifiedDate', 'Emp ID', 'Name', 'Attribute', 'Value', 'LOB', 
                 'team_leader', 'week_shift', 'week_off', 'OM', 'DPE', 'Work Type']
# 0️⃣3️⃣[BKN]EPS🗃️
EPS_TABLE_NAME = "BCOM.EPS"
Folder_EPS = user_credential / r'DataBase//DataRaw//BKN//EPS//'
log_EPS_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//EPS_log.xlsx'
EPS_schema = ['FileName', 'ModifiedDate', 'sitecode', 'manager_username', 'Username', 'Date', 'Session Login', 
              'Session Logout', 'Session Time', 'BPE Code', 'Total Time', 'SessionLogin_VN', 'SessionLogout_VN',
              'NightTime', 'DayTime', 'Night_BPE', 'Day_BPE']
# 0️⃣4️⃣[BKN]CPI🗃️
CPI_TABLE_NAME = "BCOM.CPI"
Folder_CPI = user_credential / r'DataBase//DataRaw//BKN//CPI//'
log_CPI_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//CPI_log.xlsx'
CPI_schema = ['FileName', 'ModifiedDate', 'Date', 'Staff Name', 'Hour Interval Selected', 'Channel', 
              'Item Label', 'Item ID', "'Item ID'", 'Time Alert', 'Nr. Contacts', 'Item Link', 'Time']
# 0️⃣5️⃣[GLB]RAMCO🗃️
RAMCO_TABLE_NAME = "GLB.RAMCO"
Folder_RAMCO = user_credential / r'DataBase//DataRaw//GLOBAL//RAMCO//'
log_RAMCO_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//RAMCO_log.xlsx'
RAMCO_schema = ['FileName', 'ModifiedDate', 'EID', 'Employee_Name', 'Employee_type', 'Date', 'Code']
# 0️⃣6️⃣[GLB]OT_RAMCO🗃️
OT_RAMCO_TABLE_NAME = "GLB.OT_RAMCO"
Folder_OT_RAMCO = user_credential / r'DataBase//DataRaw//GLOBAL//OT_RAMCO//'
log_OT_RAMCO_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//OT_RAMCO_log.xlsx'
OT_RAMCO_schema = ['FileName', 'ModifiedDate', 'employee_code', 'employee_name', 'Employee Type', 'OT Type', 'Date', 'Status', 'Hours']
# 0️⃣7️⃣[GLB]PremHdays🗃️
PremHdays_TABLE_NAME = "GLB.PremHdays"
Folder_PremHdays = user_credential / r'DataBase//DataRaw//GLOBAL//HOLIDAY_MAPPING//'
log_PremHdays_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//PremHdays_log.xlsx'
PremHdays_schema = ['FileName', 'ModifiedDate', 'Date', 'Holiday']
# 0️⃣8️⃣[GLB]NormHdays🗃️
NormHdays_TABLE_NAME = "GLB.NormHdays"
Folder_NormHdays = user_credential / r'DataBase//DataRaw//GLOBAL//HOLIDAY_MAPPING_NONBILLABLE//'
log_NormHdays_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//NormHdays_log.xlsx'
NormHdays_schema = ['FileName', 'ModifiedDate', 'Solar Day', 'Lunar Day', 'Holiday']
# 0️⃣9️⃣[GLB]EmpMaster🗃️
EmpMaster_TABLE_NAME = "GLB.EmpMaster"
Folder_EmpMaster = user_credential / r'DataBase//DataRaw//GLOBAL//WDD//'
log_EmpMaster_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//EmpMaster_log.xlsx'
EmpMaster_schema = ['FileName', 'ModifiedDate', 'EMPLOYEE_NUMBER', 'PREVIOUS_PAYROLL_ID', 'FIRST_NAME', 'MIDDLE_NAME', 'LAST_NAME', 
                    'FULL_NAME', 'Work Related Status', 'Work Related (Extended Status)', 'Service Type', 'WAH & Hybrid Platform', 	
                    'ORIGINAL_DATE_OF_HIRE', 'LEGAL_EMPLOYER_HIRE_DATE', 'Continuous Service Date', 'Fixed Term Hire End Date', 
                    'Contract End Date', 'PERSON_TYPE', 'WORKER_CATEGORY', 'Time Type', 'Employee Type', 'Last Promotion Date', 
                    'Assignment Category', 'Email - Work', 'BUSINESS_UNIT', 'Job Code', 'Job Title', 'Business Title', 'Cost Center - ID', 
                    'Cost Center - Name', 'LOCATION_CODE', 'LOCATION_NAME', 'CNX BU', 'Concentrix LOB', 'Process', 'COMPANY', 
                    'MANAGEMENT_LEVEL', 'Job Level', 'Compensation Grade', 'JOB_FUNCTION_DESCRIPTION', 'JOB_FAMILY', 'MSA', 'MSA Client', 
                    'MSA Program', 'ACTIVITY ID', 'SUPERVISOR_ID', 'SUPERVISOR_FULL_NAME', 'SUPERVISOR_EMAIL_ID', 'MANAGER_02_ID', 
                    'MANAGER_02_FULL_NAME', 'MANAGER_02_EMAIL_ID', 'COMP_CODE', 'CITY', 'Location', 'Country', 'Employee Status', 'Work Shift']
# 1️⃣0️⃣[GLB]Termination🗃️
Termination_TABLE_NAME = "GLB.Termination"
Folder_Termination = user_credential / r'DataBase//DataRaw//GLOBAL//WDD//'
log_Termination_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//Termination_log.xlsx'
Termination_schema = ['FileName', 'ModifiedDate', 'EMPLOYEE_ID', 'PREVIOUS_PAYROLL_ID', 'FIRST_NAME', 'MIDDLE_NAME', 'LAST_NAME', 'FULL_NAME', 
                      'EMAIL_ADDRESS', 'HIRE_DATE', 'ORIGINAL_HIRE_DATE', 'END EMPLOYMENT DATE', 'Contract End Date', 'Termination Date', 
                      'Termination Date (DD/MM/YY)', 'Eligible for Rehire', 'LWD', 'MOST RECENT TERMINATION - DATE INITIATED', 
                      'MOST RECENT TERMINATION - DATE COMPLETED', 'MOST RECENT TERMINATION - EFFECTIVE DATE', 'MOST RECENT TERMINATION - REASON', 
                      'Action date', 'DATE INITIATED', 'COMPELETED DATE AND TIME', 'TERMINATION DATE 2', 'Is Initiated through Resignation', 
                      'Termination Reason', 'Resignation Reason', 'Secondary Termination Reasons', 'Resignation Notice served', 'PERSON_TYPE', 
                      'Time Type', 'Employee Type', 'Worker Type', 'Assignment Category', 'WORKER_CATEGORY', 'BUSINESS_UNIT', 'Cost Center', 
                      'Cost Center - ID', 'JOB_CODE', 'JOB_TITLE', 'BUSINESS_TITLE', 'LOCATION_NAME', 'LOCATION_CODE', 'COUNTRY', 'COMPANY', 
                      'MANAGEMENT LEVEL', 'JOB LEVEL', 'JOB_FAMILY', 'JOB_FUNCTION', 'JOB_ROLE', 'MSA', 'CNX BU', 'Concentrix LOB', 'Process', 
                      'Client Name ( Process )', 'Compensation Grade', 'SUPERVISOR_ID', 'SUPERVISOR_FULL_NAME', 'SUPERVISOR_EMAIL_ID', 'COMP_CODE', 
                      'CITY', 'LOCATION_DESCRIPTION', 'EMPLOYEE STATUS', 'Continuous Service Date', 'Work Related Status', 
                      'Work Related (Extended Status)', 'Activity', 'MSA Legacy Project ID']
# 1️⃣1️⃣[GLB]Resignation🗃️
Resignation_TABLE_NAME = "GLB.Resignation"
Folder_Resignation = user_credential / r'DataBase//DataRaw//GLOBAL//WDD//'
log_Resignation_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//Resignation_log.xlsx'
Resignation_schema = ['FileName', 'ModifiedDate', 'Employee ID', 'Full Name', 'Job Family', 'MSA Client', 'Country', 'Location', 'Action', 
                      'Action Date', 'Date and Time Initiated', 'Status', 'Primary Reason', 'Secondary Reasons', 'Notification Date', 'Awaiting Persons', 
                      'Resignation Primary Reason', 'Hire Date', 'Proposed Termination Date', 'Notice Served', 'Sup ID', 'Supervisor Name', 
                      'Employee Status', 'Activity', 'MSA Legacy Project ID', 'Initiated By']
# 1️⃣2️⃣[BKN]CPI_PEGA🗃️
CPI_PEGA_TABLE_NAME = "BCOM.CPI_PEGA"
Folder_CPI_PEGA = user_credential / r'DataBase//DataRaw//BKN//CPI_PEGA//'
log_CPI_PEGA_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//CPI_PEGA_log.xlsx'
CPI_PEGA_schema = ['FileName', 'ModifiedDate', 'Staff Name', 'Operator Def', 'Service Case Type New', 'Channel Def',	
                   'Lang Def', 'Reason For No Service Case', 'Topic Def New', 'Subtopics', 'Case Id', 'Reservation Id Def',
                   'Day of Date', 'Blank', '# Swivels', 'Count of ServiceCase or Interaction']
# 1️⃣3️⃣[BKN]Staff🗃️
Staff_TABLE_NAME = "BCOM.Staff"
Folder_Staff = user_credential / r'DataBase//DataRaw//BKN//AGENTS//'
log_Staff_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//Staff_log.xlsx'
Staff_schema = ['FileName', 'ModifiedDate', 'Employee_ID', 'GEO', 'Site_ID', 'Employee_Last_Name', 'Employee_First_Name', 'Status', 'Wave #', 
                'Role', 'Booking Login ID', 'Language Start Date', 'TED Name', 'CUIC Name', 'EnterpriseName', 'Hire_Date', 'PST_Start_Date',
                'Production_Start_Date', 'LWD', 'Termination_Date', 'Designation', 'cnx_email', 'Booking Email', 'WAH Category', 'Full name',
                'IEX', 'serial_number', 'BKN_ID', 'Extension Number']
# 1️⃣4️⃣[BKN]ConTrack🗃️
ConTrack_TABLE_NAME = "BCOM.ConTrack"
Folder_ConTrack = Path(os.environ['USERPROFILE']) / r'OneDrive - Concentrix Corporation//DataBase//DataRaw//BKN//ContactTracker//'
log_ConTrack_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//ConTrack_log.xlsx'
ConTrack_schema = ['FileName', 'ModifiedDate', 'Id', 'Start time', 'Completion time', 'Email', 'Name', 'Reservation Number', 'Contact Types',
                   'Contact Parties', 'Unbabel Tool Used?', 'Backlog Case', 'How many days since guest contacted? (ex: 30)', 'Topics',
                   'Resolutions', 'Reason If Skipped', 'CRM used', 'Outbound to Senior', 'Outbound Status','Reason (Name - Site of Senior)',
                   'Note', 'Reason for cannot make OB call to Guest', 'Is it possible to make Outbound call to Guest? ', 'Language']
# 1️⃣5️⃣[BKN]Quality🗃️
Quality_TABLE_NAME = "BCOM.Quality"
Folder_Quality = user_credential / r'DataBase//DataRaw//BKN//QUALITY//'
log_Quality_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//Quality_log.xlsx'
Quality_schema = ['FileName', 'ModifiedDate', 'eps_name', 'eval_id', 'eval_date', 'agent_username', 'evaluator_username', 'result',	
                  'final_question_grouping', 'template_group', 'eval_template_name', 'sections', 'sitecode', 'score_n', 'score_question_weight',
                  'eval_language', 'eval_reference', 'tix_final_topic', 'tix_final_subtopic', 'csat_language_code', 'csat_satisfied']
# 1️⃣6️⃣[BKN]RONA🗃️
RONA_TABLE_NAME = "BCOM.RONA"
Folder_RONA = user_credential / r'DataBase//DataRaw//BKN//RONA//'
log_RONA_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//RONA_log.xlsx'
RONA_schema = ['FileName', 'ModifiedDate', 'Agent', 'DateTime', 'RONA']
# 1️⃣7️⃣[BKN]CUIC🗃️
CUIC_TABLE_NAME = "BCOM.CUIC"
Folder_CUIC = user_credential / r'DataBase//DataRaw//BKN//CUIC//'
log_CUIC_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//CUIC_log.xlsx'
CUIC_schema = ['FileName', 'ModifiedDate', 'FullName', 'LoginName', 'Interval', 'AgentAvailTime', 'AgentLoggedOnTime']
# 1️⃣8️⃣[BKN]KPI_Target🗃️
KPI_Target_TABLE_NAME = "BCOM.KPI_Target"
Folder_KPI_Target = user_credential / r'DataBase//DataRaw//BKN//KPI_TARGET//'
log_KPI_Target_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//KPI_Target_log.xlsx'
KPI_Target_schema = ['FileName', 'ModifiedDate', 'LOB', 'LOB Group', 'Week', 'Tenure days', 'Overall CPH tar', 'Phone CPH tar', 'Non Phone CPH tar',	
                     'Quality - Customer Impact tar', 'Quality - Business Impact tar', 'Quality - Compliance Impact tar', 'Quality - Overall tar', 'AHT Phone tar',	
                     'AHT Non-phone tar', 'AHT Overall tar', 'Hold (phone) tar', 'AACW (phone) tar', 'Avg Talk Time tar', 'Phone CSAT tar', 'Non phone CSAT tar',	
                     'Overall CSAT tar', 'PSAT tar', 'PSAT Vietnamese tar', 'PSAT English (American) tar', 'PSAT English (Great Britain) tar', 'CSAT Reso tar',
                     'Quality - personalization tar', 'Quality - proactivity tar', 'Quality - resolution tar']
# 1️⃣9️⃣[BKN]LogoutCount🗃️
LogoutCount_TABLE_NAME = "BCOM.LogoutCount"
Folder_LogoutCount = user_credential / r'DataBase//DataRaw//BKN//LOGOUT_COUNT//'
log_LogoutCount_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//LogoutCount_log.xlsx'
LogoutCount_schema = ['FileName', 'ModifiedDate', 'Aggregation', 'TimeDimension', 'KPI Value Formatted']
# 2️⃣0️⃣[BKN]WpDetail🗃️
WpDetail_TABLE_NAME = "BCOM.WpDetail"
Folder_WpDetail = user_credential / r'DataBase//DataRaw//BKN//WP_DETAIL//'
log_WpDetail_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//WpDetail_log.xlsx'
WpDetail_schema = ['FileName', 'ModifiedDate', 'LOB', 'ID', 'DateTime_Start', 'DateTime_End', 'Date_Start', 'Date_end', 'Time_Start', 'Time_End', 
                   'Dur', 'Action', 'DateTime_Act_Start', 'DateTime_Act_End', 'Date_Act_Start', 'Date_Act_End', 'Time_Act_Start', 'Time_Act_End', 'Act_Dur']
# 2️⃣1️⃣[BKN]WpSummary🗃️
WpSummary_TABLE_NAME = "BCOM.WpSummary"
Folder_WpSummary = user_credential / r'DataBase//DataRaw//BKN//WP_SUMMARY//'
log_WpSummary_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//WpSummary_log.xlsx'
WpSummary_schema = ['FileName', 'ModifiedDate', 'LOB', 'Date', 'Agent ID', 'Agent Name', 'Scheduled Activity', 'Length', 'Percent']
# 2️⃣2️⃣[BKN]RegisteredOT🗃️
RegisteredOT_TABLE_NAME = "BCOM.RegisteredOT"
Folder_RegisteredOT = user_credential / r'DataBase//DataRaw//BKN//OVERTIME//'
log_RegisteredOT_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//RegisteredOT_log.xlsx'
RegisteredOT_schema = ['FileName', 'ModifiedDate', 'Emp ID', 'Name', 'Date', 'Value', 'OT', 'LOB','Type']
# 2️⃣3️⃣[BKN]CSAT_TP🗃️
CSAT_TP_TABLE_NAME = "BCOM.CSAT_TP"
Folder_CSAT_TP = user_credential / r'DataBase//DataRaw//BKN//CSAT//'
log_CSAT_TP_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//CSAT_TP_log.xlsx'
CSAT_TP_schema = ['FileName', 'ModifiedDate', 'Sort by Dimension', 'Survey Id', 'Reservation', 'Team', 'Channel', 'Staff', 'Type', 'Date',
                  'Topic of the first Ticket', 'Language', 'Csat 2.0 Score', 'Has Comment', '"Comment"', 'Reservation Link', 'View comment',
                  'Sort by Dimension (copy)', 'Max. Sort by Dimension']
# 2️⃣4️⃣[BKN]CSAT_RS🗃️
CSAT_RS_TABLE_NAME = "BCOM.CSAT_RS"
Folder_CSAT_RS = user_credential / r'DataBase//DataRaw//BKN//CSAT_RESO//'
log_CSAT_RS_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//CSAT_RS_log.xlsx'
CSAT_RS_schema = ['FileName', 'ModifiedDate', 'Sort by Dimension', 'Survey Id', 'Reservation', 'Team', 'Channel', 'Staff', 'Type', 'Date',
                  'Topic of the first Ticket', 'Language', 'Csat 2.0 Score', 'Has Comment', '"Comment"', 'Reservation Link', 'View comment',
                  'Sort by Dimension (copy)', 'Max. Sort by Dimension']
# 2️⃣5️⃣[BKN]PSAT🗃️
PSAT_TABLE_NAME = "BCOM.PSAT"
Folder_PSAT = user_credential / r'DataBase//DataRaw//BKN//PSAT//'
log_PSAT_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//PSAT_log.xlsx'
PSAT_schema = ['FileName', 'ModifiedDate', 'Sorted By Dimension', 'Survey Id', 'Date', 'Staff Name', 'Language', 'Final Topics',
               'How satisfied were you with our service?', 'How difficult did we make it or you to solve your issue?', 'Agent understood my question',
               'Agent did everything possible to help me', 'Did we fully resolve your issue?', 'Channel', 'Hotel Id', '"Comment"',
               'Has Comment', 'Sorted BY Dimension (copy)']
# 2️⃣6️⃣[BKN]IEX_Hrs🗃️
IEX_Hrs_TABLE_NAME = "BCOM.IEX_Hrs"
Folder_IEX_Hrs = user_credential / r'DataBase//DataRaw//BKN//WP_INTERVAL//'
log_IEX_Hrs_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//IEX_Hrs_log.xlsx'
IEX_Hrs_schema = ['FileName', 'ModifiedDate', 'LOB', 'VNT', 'CET', 'HC', 'Hour']
# 2️⃣7️⃣[BKN]IntervalReq🗃️
IntervalReq_TABLE_NAME = "BCOM.IntervalReq"
Folder_IntervalReq = user_credential / r'DataBase//DataRaw//BKN//INTERVAL_REQUIREMENT//'
log_IntervalReq_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//IntervalReq_log.xlsx'
IntervalReq_schema = ['FileName', 'ModifiedDate', 'LOB', 'Datetime_CET', 'Datetime_VN', 'Value', 'Delivery_Req']
# 2️⃣8️⃣[BKN]ExceptionReq🗃️
ExceptionReq_TABLE_NAME = "BCOM.ExceptionReq"
Folder_ExceptionReq = user_credential / r'DataBase//DataRaw//BKN//EXCEPTION_REQ//'
log_ExceptionReq_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//ExceptionReq_log.xlsx'
ExceptionReq_schema = ['FileName', 'ModifiedDate', 'Emp ID', 'Date (MM/DD/YYYY)', 'Exception request (Minute)', 'Reason', 'TL', 'OM']
# 2️⃣9️⃣[BKN]LTTransfers🗃️
LTTransfers_TABLE_NAME = "BCOM.LTTransfers"
Folder_LTTransfers = user_credential / r'DataBase//DataRaw//BKN//HC_TRANSFER//'
log_LTTransfers_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//LTTransfers_log.xlsx'
LTTransfers_schema = ['FileName', 'ModifiedDate', 'EID', 'Full Name', 'Employee Status', 'LWD', 'Remarks']
# 3️⃣0️⃣[BKN]DailyReq🗃️
DailyReq_TABLE_NAME = "BCOM.DailyReq"
Folder_DailyReq = user_credential / r'DataBase//DataRaw//BKN//REQUIREMENT_HOURS//'
log_DailyReq_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//DailyReq_log.xlsx'
DailyReq_schema = ['FileName', 'ModifiedDate', 'LOB', 'Date', 'Daily Requirement', 'Prod Requirement']
# 3️⃣1️⃣[BKN]ProjectedShrink🗃️
ProjectedShrink_TABLE_NAME = "BCOM.ProjectedShrink"
Folder_ProjectedShrink = user_credential / r'DataBase//DataRaw//BKN//SHRINKAGE_TARGET//'
log_ProjectedShrink_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//ProjectedShrink_log.xlsx'
ProjectedShrink_schema = ['FileName', 'ModifiedDate', 'LOB', 'Week', 'Ratio']
# 3️⃣2️⃣[BKN]OTReq🗃️
OTReq_TABLE_NAME = "BCOM.OTReq"
Folder_OTReq = user_credential / r'DataBase//DataRaw//BKN//OT_REQ//'
log_OTReq_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//OTReq_log.xlsx'
OTReq_schema = ['FileName', 'ModifiedDate', 'Date', 'LOB', 'OT Hour', 'Type']
# 3️⃣3️⃣[BKN]CapHC🗃️
CapHC_TABLE_NAME = "BCOM.CapHC"
Folder_CapHC = user_credential / r'DataBase//DataRaw//BKN//CAPACITY_HC//'
log_CapHC_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//CapHC_log.xlsx'
CapHC_schema = ['FileName', 'ModifiedDate', 'LOB', 'Date', 'Client Requirement (Hours)']
# 3️⃣4️⃣[BKN]ProjectedHC🗃️
ProjectedHC_TABLE_NAME = "BCOM.ProjectedHC"
Folder_ProjectedHC = user_credential / r'DataBase//DataRaw//BKN//PROJECTED_HEADCOUNT//'
log_ProjectedHC_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//ProjectedHC_log.xlsx'
ProjectedHC_schema = ['FileName', 'ModifiedDate', 'Date', 'LOB', 'FTE Required', 'Projected HC', 'Plan Leave', 
                      'Actual Projected HC', '%OO', '%IO', 'Projected HC with Shrink', 'OT', 'Leave allow for Shrink', '% Deli']
# 3️⃣5️⃣[BKN]RampHC🗃️
RampHC_TABLE_NAME = "BCOM.RampHC"
Folder_RampHC = user_credential / r'DataBase//DataRaw//BKN//RAMPUP_HC//'
log_RampHC_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//RampHC_log.xlsx'
RampHC_schema = ['FileName', 'ModifiedDate', 'Date', 'LOB', 'Headcount', 'Hours']

# 3️⃣6️⃣[BKN]SEAT🗃️
SEAT_TABLE_NAME = "BCOM.SEAT"
Folder_SEAT = user_credential / r'DataBase//DataFrame//BKN//SEAT_MAP//History data//'
log_SEAT_path = user_credential / r'DataBase//DataRaw//BKN//MODIFIED_LOG//SEAT_log.xlsx'
SEAT_schema = ['FileName','ModifiedDate','Date','Emp ID','TED Name','Week_day','Seat No','Floor','Building']
# -----------------------------------------------------------------------------------------------#
# Database_Connecter🧬

server_name = "PHMANVMDEV01V"
server_ip = "10.5.11.60"
database = "wfm_vn_dev"
user = "usr_wfmvn_dev"
password = "12guWU2OdEj5kEspl9Rlfoglf"
# SQL Server Authentication 🔗
connection_string = f"mssql+pyodbc://{user}:{password}@{server_ip}/{database}?driver=ODBC+Driver+17+for+SQL+Server"
# Windows Authentication 🔗
# connection_string = f"mssql+pyodbc://{server_name}/{database}?driver=ODBC+Driver+17+for+SQL+Server&Trusted_Connection=yes"
try:
    engine = create_engine(connection_string, fast_executemany=True)
    logger.info(f"✅ Successfully connected to DB: {database} server: {server_ip}")
except Exception as e:
    logger.exception("❌ DB Connection error")
    raise

In [None]:
# Function Definition🛠️

# Log Color view💡
def print_colored(text, color):
    display(HTML(f'<span style="color: {color};">{text}</span>'))

# Check existing log file💡
def read_or_create_log(log_path):
    with warnings.catch_warnings():
        warnings.simplefilter('ignore') # Ignor Warning📃
        try:
            logger.debug(f"Reading log file: {log_path}")
            log_df = pl.read_excel(log_path)
            log_df = log_df.with_columns([pl.col("ModifiedDate").dt.cast_time_unit("ms")], strict=False)
            logger.info(f"Success read log file: {log_path}")
        except FileNotFoundError: # Create new log if can't find log📃
            logger.warning(f"Log file not found: {log_path}. Create new log.")
            log_df = pl.DataFrame(
                {
                    "FileName": pl.Series([], dtype=pl.Utf8),
                    "ModifiedDate": pl.Series([], dtype=pl.Datetime),
                    "Error": pl.Series([], dtype=pl.Utf8),})
        except Exception as e: # Create new log if can't open log📃
            logger.exception(f"Error reading log file: {log_path}")
            print(f"Error reading log file: {e}")
            log_df = pl.DataFrame(
                {
                    "FileName": pl.Series([], dtype=pl.Utf8),
                    "ModifiedDate": pl.Series([], dtype=pl.Datetime),
                    "Error": pl.Series([], dtype=pl.Utf8),})
        return log_df
        
# Update log_df💡
def process_and_save_log(log_df, log_entries, log_path):
    if log_entries:
        new_log_df = pl.DataFrame(log_entries)
        log_df = log_df.with_columns(pl.col('ModifiedDate').dt.cast_time_unit("ms"))
        log_df = (pl.concat([log_df, new_log_df], how="diagonal_relaxed") # Combine and remove duplicate New_Log and Old_Log📃
                  .sort("ModifiedDate", descending=[False])
                  .unique(subset=["FileName"], keep="last")
                  .sort("FileName", descending=[False])
                  .select(["FileName", "ModifiedDate", "Error"]))
        try:
            log_df.write_excel(log_path, worksheet="ImportLog", autofit=True)
            print(f"Import log saved to: {log_path}")
            logger.info(f"Import log saved to: {log_path}")
        except Exception as e:
            print(f"Error writing log file: {e}")
            logger.error(f"Error writing log file: {log_path} - {e}")

# write_data💡
def write_data(engine, table_name, df): # write to database📃
     df.write_database(table_name=table_name, connection=engine, if_table_exists="append")
    
# delete_data💡
def delete_data(engine, table_name, filename):
    try:
        with engine.connect() as connection:
            print_colored(f"Prepare to delete old data for '{filename}' in '{table_name}'", "DarkTurquoise")
            logger.warning(f"Prepare to delete old data for '{filename}' in '{table_name}'")
            delete_query = text(f"DELETE FROM {table_name} WHERE [FileName] = :filename")
            connection.execute(delete_query, {"filename": filename})
            connection.commit()
            print_colored(f"Old data deleted successfully🧹", "DarkTurquoise")
            logger.info(f"'{filename}' data deleted successfully in '{table_name}' 🧹.")
    except Exception as e:
        logger.exception(f"Error while delete data for '{filename}' in '{table_name}'")
        print_colored(f"Error while delete data for '{filename}' in '{table_name}'", "DarkTurquoise")
        raise 
        
# Check Time💡
def is_time_between(begin_time, end_time, check_time=None):
    check_time = check_time or datetime.utcnow().time() # If check time is not given, default to current UTC time📃
    if begin_time < end_time:
        return check_time >= begin_time and check_time <= end_time
    else: # crosses midnight📃
        return check_time >= begin_time or check_time <= end_time
def time_difference(time1, time2):
    seconds1 = time1.hour * 3600 + time1.minute * 60 + time1.second # Convert times to seconds📃
    seconds2 = time2.hour * 3600 + time2.minute * 60 + time2.second
    diff_seconds = seconds1 - seconds2
    return diff_seconds

# Final Summary💡
def display_summary(source_name: str, error_count: int) -> None:
    """Final Notice."""
    if error_count > 0:
        print_colored(f"Finished processing all files ({error_count} have errors🛠️).", "OrangeRed")
        logger.warning(f"Finished processing all files ({error_count} have errors🛠️).")
    else:
        print_colored(f"Finished processing all files (no errors🎉).", "PaleVioletRed")
        logger.info(f"Finished processing [{source_name}] (no errors🎉).")

# Default_variable💡
def Default_variable():
    log_entries = []
    error_count = 0
    return log_entries, error_count

# parse_date💡
def parse_date(col: pl.Expr) -> pl.Expr:
    return pl.coalesce(
        col.str.strptime(pl.Date, format="%m/%d/%Y", strict=False),
        col.str.strptime(pl.Date, format="%Y-%m-%d", strict=False),
        col.str.strptime(pl.Date, format="%d %B %Y", strict=False),
        col.str.strptime(pl.Date, format="%B %d, %Y", strict=False),
        col.str.strptime(pl.Date, format="%d-%b-%y", strict=False),
        col.str.strptime(pl.Date, format="%Y%m%d", strict=False),
        col.str.strptime(pl.Date, format="%d/%m/%y", strict=False),
        col.str.strptime(pl.Date, format="%d-%m-%Y", strict=False),
    )

# validate_schema💡
def validate_schema(df: pl.DataFrame, expected_schema: list[str], filename: str) -> tuple[bool, str | None]:
    # Start validation
    start_msg = f"🔍 Starting schema validation for file: {filename}"
    logger.info(start_msg)
    print_colored(start_msg, "DodgerBlue")
    actual_columns = df.columns
    expected_set = set(expected_schema)
    actual_set = set(actual_columns)
    missing_columns = expected_set - actual_set
    extra_columns = actual_set - expected_set
    has_critical_error = False
    critical_error_message = None
    has_warnings = False
    # 1. Schema error (Missing columns)
    if missing_columns:
        has_critical_error = True
        critical_error_message = f"Schema error in the file: '{filename}'. Missing columns: {sorted(list(missing_columns))}"
        logger.error(critical_error_message)
        print_colored(f"❗️ {critical_error_message}", "OrangeRed")
    # 2. warning extra columns
    if extra_columns:
        has_warnings = True
        warning_message = f"warning schema for file '{filename}'. Extra columns: {sorted(list(extra_columns))}. These columns will be excluded from the import process."
        logger.warning(warning_message)
        print_colored(f"⚠️ {warning_message}", "Gold")
    # 3. Final results announcement
    if not has_critical_error and not has_warnings:
        final_msg = f"✅ Completely valid schema for the file: {filename}."
        logger.info(final_msg)
        print_colored(final_msg, "MediumSeaGreen")
    elif not has_critical_error and has_warnings:
        final_msg = f"⚠️ File schema check: {filename} Passed (No missing columns, extra columns warned)"
        logger.info(final_msg)
        print_colored(final_msg, "MediumSeaGreen") # Vẫn dùng màu xanh lá
    elif has_critical_error:
        final_msg = f"❌ Schema validation failed due to missing column(s) for file: {filename}."
        logger.warning(final_msg) # Log ở mức warning hoặc error tùy ý
        print_colored(final_msg, "OrangeRed")
    return has_critical_error, critical_error_message
    
# DF Info💡
def info_polars(df: pl.DataFrame):
    print_colored(f"⚙️Final structure", "Olive")
    logger.info(f"⚙️Final structure")
    shape = df.shape
    print(f"Shape: {shape}")
    print("Data columns:")  
    table_data = []
    for i, name in enumerate(df.columns):
        dtype = df.dtypes[i]
        non_null_count = df.select(pl.col(name).is_not_null().sum()).item()
        table_data.append([i, name, non_null_count, dtype])  
    headers = ["#", "Column", "Non-Null Count", "Dtype"]
    print(tabulate(table_data, headers=headers, tablefmt="grid"))
    logger.info(tabulate(table_data, headers=headers, tablefmt="grid"))

In [None]:
# MaintainDatabase🧰

print_colored("===== Starting Index Rebuild Process =====", "DodgerBlue")

MaintainDatabase_sql = """

EXEC BCOM.usp_MaintainDatabase

"""

try:
    with engine.connect() as connection:
        print_colored("⚙️Executing Index Rebuild script (this may take a long time)...", "DarkOrange")
        connection.execute(text(MaintainDatabase_sql))
        connection.commit() 
        print_colored("✔️Index Rebuild script execution command sent and committed. Check SQL Server logs/output for details.", "MediumSeaGreen")
except sa.exc.SQLAlchemyError as e_db:
    print_colored(f"❌ Database error during Index Rebuild Process: {e_db}", "OrangeRed")
except Exception as e_general:
    print_colored(f"❌ An unexpected error occurred during Index Rebuild Process: {e_general}", "OrangeRed")

print_colored("===== Index Rebuild Process attempt is complete (Python perspective) =====", "DodgerBlue")

In [None]:
# ⚙️Create_EEAAO
logger.info("===== Starting EEAAO Process =====")
# EXEC EEAAO Procedure
Exec_EEAAO = """
EXEC BCOM.Refresh_EEAAO_Data;
"""
select_query = """
SELECT TOP 5 * FROM BCOM.EEAAO;
"""
try:
    with engine.connect() as connection:
        logger.info("⚙️Executing procedure EEAAO ...")
        print("⚙️Executing procedure EEAAO ...")
        connection.execute(text(Exec_EEAAO))
        connection.commit()
        logger.info("✔️Successfully executed and committed Procedure EEAAO.")
        print("✔️Successfully executed and committed Procedure EEAAO.")
        logger.info(f"Reading data from BCOM.EEAAO with query: {select_query.strip()}")
        print(f"Reading data from BCOM.EEAAO with query: {select_query.strip()}")
        df_eeao_result = pl.read_database(query=select_query, connection=connection)
        if df_eeao_result is not None and not df_eeao_result.is_empty():
            print_colored("Sample data from BCOM.EEAAO after refresh:", "MediumSeaGreen")
            display(df_eeao_result)
            logger.info(f"Successfully read {df_eeao_result.shape[0]} rows from BCOM.EEAAO.")
        else:
            logger.warning("No data returned from BCOM.EEAAO after refresh or procedure did not complete in time.")
            print_colored("No data returned from BCOM.EEAAO after refresh.", "OrangeRed")
except sa.exc.SQLAlchemyError as e:
    logger.error(f"Database error during EEAAO Process: {e}", exc_info=True)
    print(f"Database error: {e}") 
except Exception as e:
    logger.error(f"An unexpected error occurred during EEAAO Process: {e}", exc_info=True)
    print(f"An unexpected error: {e}")
logger.info("===== Processing of EEAAO is complete =====")

In [None]:
# Close DB📃
engine.dispose()
print("Database connection closed.")
%reset -f