In [1]:
import pandas as pd
from datetime import date, datetime

<h2>Preprocessing UC_FIN284_RFL_RECONCILE worksheet</h2>

In [6]:
df = pd.read_excel('Input_Files/UC_FIN284_RFL_RECONCILE_1108132082.xls', header=1)



In [7]:
len(df)

4241

<h4>1.Create UID (Unique ID) which concatenates STRING(emplid) and STRING(Current Date)based of Excel Day Zero - 12/30/1899 </h4>

<h5>List of created dataframes:</h5>
<ul>
    <li>dfPaidLeave - dataframe where rows have Pay Status == "Paid Leave of Absence"</li>
    <li>dfinactive - dataframe where rows have Payroll Status other than 'A' (Active) and Pay Status other than 'Paid Leave of Absence'</li>
    <li>dfGandN - dataframe where rows have Elig Fld 1 is G or N, Payroll Status = Active, Pay Status other than 'Paid Leave of Absence'</li>
    <li>dfFutureDated - dataframe where rows have Current Date > today's date , Elig Fld 1 is neither G or N, Payroll Status = Active, Pay Status other than 'Paid Leave of Absence'</li>
    <li>dfactive - dataframe where rows have Current Date <= today's date, sorted by Current Date - newest to oldest , Elig Fld 1 is neither G or N, Payroll Status = Active, Pay Status other than 'Paid Leave of Absence'</li>
</ul>

In [3]:
def excel_serial_date(d):
    excel_day_zero = date(1899,12,30)
    delta = d.date() - excel_day_zero
    return delta.days
#UID = str(df['Employee ID'][0])+str(excel_serial_date(df['Current Date'][0]))
#date_string = "7/8/2025"
#dt_obj = datetime.strptime(date_string,"%m/%d/%Y")
#UID = "10051642"+ str(excel_serial_date(dt_obj))
#print(UID)

In [8]:
df['UID'] = [ str(emplid)+str(excel_serial_date(cdate)) for emplid, cdate in zip(df['Employee ID'], df['Current Date'])]

In [10]:
df.drop_duplicates(subset=['Employee ID','Current Date'],inplace=True)
#len(df['Employee ID'])

4167

<h4>2. Move all rows with Pay Status = Paid Leave of Absence to Paid Leave dataframe and remove duplicate</h4>

In [6]:
dfPaidLeave = df[(df['Pay Status']=='Paid Leave of Absence')].copy()
dfPaidLeave.drop_duplicates(subset=['UID'],inplace=True)
#dfPaidLeave.head()

In [7]:
#create dfFiltered which is a copy of df and only keeps rows whose Pay Status != Paid leave of absence
#dfFiltered should now contain rows where Pay Status other than Paid Leave of Absence
dfFiltered = df[(df['Pay Status']!= 'Paid Leave of Absence')].copy()


array(['Unpaid Leave of Absence', 'Short Work Break', 'Terminated',
       'Active'], dtype=object)

<h4>3. Sort all rows with Pay Status != Paid Leave of Absence by Current Date from oldest to newest</h4>

In [None]:
dfFiltered = df[(df['Pay Status']!='Paid Leave of Absence')].copy()
#dfFiltered.sort_values(by='Current Date',ascending=True,inplace=True)
dfFiltered.head()

<h4>4. Move Future dated RFL rows to Future-Dated dataframe , remove duplicates, sort Current Date, and filter only RFL current date </h4>

In [None]:
dfFutureDatedRFL = dfFiltered[(dfFiltered['Current Date']> datetime.now())].copy()
dfFutureDatedRFL.sort_values(by='Current Date',ascending=True,inplace=True)
dfFutureDatedRFL.drop_duplicates(subset=['UID'],inplace=True)

In [None]:
dfFutureDatedRFL.head()

<h4>5. Remove future-dated rows in (filtered) Master i.e. dfFiltered, sort Current Date newest to oldest, and remove duplicates on UID </h4>

In [None]:
dfFiltered= dfFiltered[(dfFiltered['Current Date'] <= datetime.now())].copy()
dfFiltered.sort_values(by='Current Date', ascending=False, inplace=False)
dfFiltered.drop_duplicates(subset=['UID'],inplace=True)


In [None]:
dfFiltered.head()

<h4>Create a dictionary/map for Payroll Status</h4>

In [8]:
dfFiltered['Payroll Status'].unique()
payrollStatusList=list(dfFiltered['Payroll Status'].unique())
payrollStatusDict ={}
for p in payrollStatusList:
    payrollStatusDict[p]=p[0]
    
payrollStatusDict

{'Active': 'A', 'Terminated': 'T', 'Retired': 'R', 'Deceased': 'D'}

<h4>Re-map Payroll Status with abbreviated letter</h4>

In [9]:
dfFiltered['Payroll Status'] = dfFiltered['Payroll Status'].map(lambda x: payrollStatusDict[x])

<h4>3.Create a copy dataframe where Payroll Status is Inactive</h4>

In [10]:
dfinactive = dfFiltered[(dfFiltered['Payroll Status']!='A')].copy()

In [11]:
#Remove all inactive rows from dfFiltered
#dfFiltered now has rows where Payroll Status is A
dfFiltered = dfFiltered[(dfFiltered['Payroll Status']=='A')].copy()

array(['A'], dtype=object)

<h4>4.Create a copy dataframe where 'Elig Fld1' = G or N</h4>

In [19]:
dfGandN = dfFiltered[(dfFiltered['Elig Fld 1'].isin(['G','N']))]
#remove all active rows in dFfiltered where Elig Fld 1 .is in G,N
#dfFiltered now has rows where Elig Fld 1 is not in G or N
dfFiltered = dfFiltered[~(dfFiltered['Elig Fld 1'].isin(['G','N']))].copy()

<h4>5.Create a copy dataframe where Current Date is Future Dated i.e. Current Date > datetime.now()</h4>

In [22]:
dfFutureDated = dfFiltered[(dfFiltered['Current Date']> datetime.now())].copy()
#remove all active rows in dfFiltered where Current Date > now
# dfFiltered now has rows where Current Date <= now
dfFiltered = dfFiltered[(dfFiltered['Current Date']<= datetime.now())].copy()

<h4>6.Create a copy dataframe where Payroll Status is Active</h4>

In [29]:
dfactive = dfFiltered
dfactive.sort_values(by=['Current Date'],ascending=False,inplace=True)

<h2>Write dataframes into Excel</h2>

In [31]:
todayDate =datetime.now().strftime("%m.%d.%y_%H-%M-%S")
outputfilename="RFL Master File_"+todayDate

with pd.ExcelWriter(f'./test/{outputfilename}.xlsx',datetime_format='MM/DD/YYYY') as writer:
        dfactive.to_excel(writer,sheet_name='Active', index=False)
        dfFutureDated.to_excel(writer,sheet_name='Future Dated',index=False)
        dfinactive.to_excel(writer,sheet_name='Inactive',index=False)
        dfGandN.to_excel(writer, sheet_name='G&N Elg 1',index=False)
        dfPaidLeave.to_excel(writer,sheet_name='Paid Leave',index=False)
        df.to_excel(writer,sheet_name='Raw', index=False)
        

<h2>Garbage Collection: Delete dfs to clear memory</h2>

In [32]:
del df
del dfFiltered
del dfactive
del dfinactive
del dfFutureDated
del dfPaidLeave
del dfGandN