In [1]:
import pandas as pd
import os
from datetime import datetime as dt

In [2]:
#change to payroll teams documents folder, note that this needs to be synched to your directory for this to work
os.chdir(r"C:\Users\alex.jefferies\Leighs Construction Limited\People and Culture - 11 Payroll - People Dashboard Data Sources")

In [3]:
#helper functions
def time_convert(atime):
    newtime = dt.fromtimestamp(atime)
    return newtime.date()

def create_file_records(somepath) -> dict:
    #dictionary
    firstDict = {}
    
    for name in os.listdir(somepath): 

        if ".xlsx" in name or ".csv" in name.lower():
    
            filepath = os.path.join(somepath, name)
            
            #main library that holds stats
            stats = os.stat(filepath)
            
            attrs = {
                'File Name': name,
                #'Size (KB)': sizeFormat(stats.st_size),
                'Creation Date': time_convert(stats.st_ctime),
                'Modified Date': time_convert(stats.st_mtime),
                'Last Access Date': time_convert(stats.st_atime),
                
            }
           
            firstDict[name] = attrs 

    return firstDict 

def view_records(file_records) -> None:
    
    for key, value in file_records.items():
            
        for k,v in value.items():
            print(f"{k}: {v}")
        print()

def get_info(df:pd.DataFrame):
    print("="*50)
    print(df.shape)
    print(df.head())
    print()
    print(df.info())
    print()
    print("="*50)


In [4]:
current_path = os.getcwd()
subdirs = [os.path.join(current_path, d) for d in os.listdir(current_path) if os.path.isdir(os.path.join(current_path, d))]

# Sort the subdirectories by their creation time in descending order
sorted_subdirs = sorted(subdirs, key=os.path.getctime, reverse=True)

# Get the latest subdirectory
latest_subdir = sorted_subdirs[0]
os.chdir(latest_subdir)

In [5]:
latest_subdir

'C:\\Users\\alex.jefferies\\Leighs Construction Limited\\People and Culture - 11 Payroll - People Dashboard Data Sources\\240401'

In [6]:
!dir

 Volume in drive C is Windows 
 Volume Serial Number is 9841-01DE

 Directory of C:\Users\alex.jefferies\Leighs Construction Limited\People and Culture - 11 Payroll - People Dashboard Data Sources\240401

04/08/2024  08:12 AM    <DIR>          .
04/08/2024  08:12 AM    <DIR>          ..
04/01/2024  06:46 AM          (19,495) 240401 NorthSalaries EmployeeList .xlsx
04/01/2024  06:47 AM           (2,272) 240401 NORTHSALARIES LEAVEBALANCES.CSV
04/01/2024  06:47 AM          (29,429) 240401 NorthSalaries TerminatedEmployeeList .xlsx
04/01/2024  06:57 AM          (19,016) 240401 NorthWages EmployeeList .xlsx
04/01/2024  06:58 AM           (2,486) 240401 NORTHWAGES LEAVEBALANCES.CSV
04/01/2024  06:57 AM          (22,428) 240401 NorthWages TerminatedEmployeeList .xlsx
04/01/2024  06:49 AM          (21,603) 240401 SouthSalaries EmployeeList .xlsx
04/01/2024  06:49 AM           (3,308) 240401 SOUTHSALARIES LEAVEBALANCES.CSV
04/01/2024  06:49 AM          (30,292) 240401 SouthSalaries TerminatedEm

### Directory sorting and filtering

In [7]:
file_records = create_file_records(latest_subdir)

In [8]:
view_records(file_records)

File Name: 240401 NorthSalaries EmployeeList .xlsx
Creation Date: 2024-04-08
Modified Date: 2024-04-01
Last Access Date: 2024-04-08

File Name: 240401 NORTHSALARIES LEAVEBALANCES.CSV
Creation Date: 2024-04-08
Modified Date: 2024-04-01
Last Access Date: 2024-04-08

File Name: 240401 NorthSalaries TerminatedEmployeeList .xlsx
Creation Date: 2024-04-08
Modified Date: 2024-04-01
Last Access Date: 2024-04-08

File Name: 240401 NorthWages EmployeeList .xlsx
Creation Date: 2024-04-08
Modified Date: 2024-04-01
Last Access Date: 2024-04-08

File Name: 240401 NORTHWAGES LEAVEBALANCES.CSV
Creation Date: 2024-04-08
Modified Date: 2024-04-01
Last Access Date: 2024-04-08

File Name: 240401 NorthWages TerminatedEmployeeList .xlsx
Creation Date: 2024-04-08
Modified Date: 2024-04-01
Last Access Date: 2024-04-08

File Name: 240401 SouthSalaries EmployeeList .xlsx
Creation Date: 2024-04-08
Modified Date: 2024-04-01
Last Access Date: 2024-04-08

File Name: 240401 SOUTHSALARIES LEAVEBALANCES.CSV
Creation D

In [9]:
#checking the files in the folder, there should be 12 excluding "employee_data_tidy"
most_recent_records = file_records.values()
for item in most_recent_records:
    print(item['File Name'])
    print()

240401 NorthSalaries EmployeeList .xlsx

240401 NORTHSALARIES LEAVEBALANCES.CSV

240401 NorthSalaries TerminatedEmployeeList .xlsx

240401 NorthWages EmployeeList .xlsx

240401 NORTHWAGES LEAVEBALANCES.CSV

240401 NorthWages TerminatedEmployeeList .xlsx

240401 SouthSalaries EmployeeList .xlsx

240401 SOUTHSALARIES LEAVEBALANCES.CSV

240401 SouthSalaries TerminatedEmployeeList .xlsx

240401 SouthWages EmployeeList .xlsx

240401 SOUTHWAGES LEAVEBALANCES.CSV

240401 SouthWages TerminatedEmployeeList .xlsx



In [12]:
#determine correct file types
salaried = [1 if "salaries" in record["File Name"].lower() else 0 for record in most_recent_records]
waged = [1 if "wages" in record["File Name"].lower() else 0 for record in most_recent_records]
north = [1 if "north" in record["File Name"].lower() else 0 for record in most_recent_records]
south = [1 if "south" in record["File Name"].lower() else 0 for record in most_recent_records]

#loop through and add boolean columns
for record,sal,wag,nor,sou in zip(most_recent_records,salaried,waged,north,south):
    record["IsSalaries"] = sal
    record["IsWages"] = wag
    record["IsNorth"] = nor
    record["IsSouth"] = sou

most_recent_records

dict_values([{'File Name': '240401 NorthSalaries EmployeeList .xlsx', 'Creation Date': datetime.date(2024, 4, 8), 'Modified Date': datetime.date(2024, 4, 1), 'Last Access Date': datetime.date(2024, 4, 8), 'IsSalaries': 1, 'IsWages': 0, 'IsNorth': 1, 'IsSouth': 0}, {'File Name': '240401 NORTHSALARIES LEAVEBALANCES.CSV', 'Creation Date': datetime.date(2024, 4, 8), 'Modified Date': datetime.date(2024, 4, 1), 'Last Access Date': datetime.date(2024, 4, 8), 'IsSalaries': 1, 'IsWages': 0, 'IsNorth': 1, 'IsSouth': 0}, {'File Name': '240401 NorthSalaries TerminatedEmployeeList .xlsx', 'Creation Date': datetime.date(2024, 4, 8), 'Modified Date': datetime.date(2024, 4, 1), 'Last Access Date': datetime.date(2024, 4, 8), 'IsSalaries': 1, 'IsWages': 0, 'IsNorth': 1, 'IsSouth': 0}, {'File Name': '240401 NorthWages EmployeeList .xlsx', 'Creation Date': datetime.date(2024, 4, 8), 'Modified Date': datetime.date(2024, 4, 1), 'Last Access Date': datetime.date(2024, 4, 8), 'IsSalaries': 0, 'IsWages': 1, 'I

### Combining and Processing files

In [18]:

from collections import Counter

def separate_files(records,south_or_north:str="IsSouth",salaried_or_waged:str = "IsWages",leave_filter:str='leavebalances',terminated_filter='terminated') -> tuple[list,list,list]:
    """separates list of files into south/noth,waged/salaried, leave balanced and terminated"""
    for record in records:
        file = record["File Name"]

        if not (record[south_or_north] and record[salaried_or_waged]):
            continue

        elif leave_filter not in file.lower() and terminated_filter not in file.lower():
            waged_or_salaried_file =  file

        elif leave_filter in file.lower():
            leave_balances_file = file

        elif terminated_filter in file.lower():
            terminated_file = file

        else:
            raise FileNotFoundError("Please check the name of the file")

    return waged_or_salaried_file,leave_balances_file,terminated_file

def add_name_key(df:pd.DataFrame,name_column:str="Name") -> pd.DataFrame:
    """add name key to dataframe"""
    df["Name_Key"] = df[name_column].str.replace(",","").str.replace(" ","").str.lower().str.strip()
    return df

def table_to_html(df):
    html = df.to_html()
    # Write html to file
    text_file = open("index.html", "w")
    text_file.write(html)
    text_file.close()

def files_to_dataframe(file,leave_balances_file,terminated_file) -> pd.DataFrame:
    """convert separated files into dataframe format"""
    file_df = pd.read_excel(file,skiprows=1,engine="openpyxl")
    ##add status information
    file_df["Status"] = "Active"
    #standardize name
    add_name_key(file_df, name_column="Name")
    
    leave_df = pd.read_csv(leave_balances_file)
    add_name_key(leave_df, name_column="Employee Full Name")

    terminated_df = pd.read_excel(terminated_file,skiprows=1,engine="openpyxl")
    terminated_df["Status"] = "Inactive"
    add_name_key(terminated_df, name_column="Name")

    #merge on Name_Key
    merged_df = file_df.merge(leave_df,on="Name_Key",how="left")
    #concatendate active and inative staff
    concat_df = pd.concat([merged_df,terminated_df],axis=0)

    return concat_df

def add_region(df,modify_to:str='North Island'): 
    df["Region"] = modify_to
    return df

In [19]:

print("="*50)
print("South Waged")
waged_file,leave_balances_file,terminated_file = separate_files(most_recent_records)
south_waged = files_to_dataframe(waged_file,leave_balances_file,terminated_file)
#write an html table
add_region(south_waged, modify_to="South Island")

get_info(south_waged)


South Waged
(479, 19)
                                         Name Start Date  \
0  Abad, Arnold Sanguyo                       2012-09-27   
1  Acantilado, Richel                         2015-01-08   
2  Adlaon, Arthur Prieto                      2022-10-13   
3  Algar, Roy Anthony                         2011-09-12   
4  Anderson, Liam James                       2022-08-29   

                 Department Employment  Status Salary/ Wage  Hours Worked  \
0  22 - Projects Operations          Permanent     Wage              40.0   
1  22 - Projects Operations          Permanent     Wage              40.0   
2  22 - Projects Operations          Permanent     Wage              47.5   
3  25 - Projects Management          Permanent     Wage              50.0   
4  25 - Projects Management          Permanent     Wage              50.0   

  Birth Date                           Occupation Finish Date  \
0 1977-10-08  Leading Hand                                NaT   
1 1977-01-24  Carpenter 

  concat_df = pd.concat([merged_df,terminated_df],axis=0)


In [20]:
#north wages, filenames contain both north and wages

print("="*50)
print("North Waged")
north_waged_file,leave_balances_file,terminated_file = separate_files(most_recent_records,south_or_north="IsNorth",salaried_or_waged="IsWages")
north_waged = files_to_dataframe(north_waged_file,leave_balances_file,terminated_file)

add_region(north_waged,"North Island")
#adjust_departments(north_waged)

##final preprocessing
#for exited staff MYOB defaults the Cost Center to 1 - Leighs Christchurch.Modify the North Island wages to Auckland (by Default) or New Plymouth
#north_waged.loc[north_waged["Finish"] and [north_waged["Cost Centre"]] = "2 - Leighs Auckland"
get_info(north_waged)


North Waged
(175, 19)
                                         Name Start Date  \
0  Albio, Joseph Garcia                       2023-02-24   
1  Alexander, Michael Graeme                  2023-10-16   
2  Ampongan, Moises Jr Oraa                   2023-03-21   
3  Anonuevo, Lito Burgos                      2013-05-08   
4  Anonuevo, Joel                             2014-06-06   

                 Department Employment  Status Salary/ Wage  Hours Worked  \
0  22 - Projects Operations          Permanent     Wage              47.5   
1  22 - Projects Operations          Permanent     Wage              45.0   
2  22 - Projects Operations          Permanent     Wage              47.5   
3  22 - Projects Operations          Permanent     Wage              40.0   
4  22 - Projects Operations          Permanent     Wage              40.0   

  Birth Date                           Occupation Finish Date  \
0 1987-11-16  Carpenter                                   NaT   
1 1964-11-23  Gateman   

  concat_df = pd.concat([merged_df,terminated_df],axis=0)


In [21]:
#Counter(north_waged["Cost Centre"])

KeyError: 'Cost Centre'

In [22]:
#concatenate north and south for waged
waged = pd.concat([south_waged,north_waged])
waged.head()

Unnamed: 0,Name,Start Date,Department,Employment Status,Salary/ Wage,Hours Worked,Birth Date,Occupation,Finish Date,Ethnicity,Visa Expiry,Visa Type,Gender,Status,Name_Key,Employee Full Name,Sick/Special Leave Balance,Holidays Balance,Region
0,"Abad, Arnold Sanguyo",2012-09-27,22 - Projects Operations,Permanent,Wage,40.0,1977-10-08,Leading Hand,NaT,...,...,Resident Visa ...,Male,Active,abadarnoldsanguyo,Abad Arnold Sanguyo,10.0,14.67,South Island
1,"Acantilado, Richel",2015-01-08,22 - Projects Operations,Permanent,Wage,40.0,1977-01-24,Carpenter,NaT,...,...,Resident Visa ...,Male,Active,acantiladorichel,Acantilado Richel,19.0,11.81,South Island
2,"Adlaon, Arthur Prieto",2022-10-13,22 - Projects Operations,Permanent,Wage,47.5,1970-03-24,Carpenter,NaT,...,...,Resident Visa ...,Male,Active,adlaonarthurprieto,Adlaon Arthur Prieto,0.0,8.45,South Island
3,"Algar, Roy Anthony",2011-09-12,25 - Projects Management,Permanent,Wage,50.0,1973-12-02,Site Supervisor,NaT,...,...,...,Male,Active,algarroyanthony,Algar Roy Anthony,0.0,2.4,South Island
4,"Anderson, Liam James",2022-08-29,25 - Projects Management,Permanent,Wage,50.0,1989-07-31,Site Supervisor,NaT,NZ European ...,...,...,Male,Active,andersonliamjames,Anderson Liam James,10.0,11.55,South Island


In [23]:
waged.info()

<class 'pandas.core.frame.DataFrame'>
Index: 654 entries, 0 to 107
Data columns (total 19 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   Name                         654 non-null    object        
 1   Start Date                   654 non-null    datetime64[ns]
 2   Department                   654 non-null    object        
 3   Employment  Status           654 non-null    object        
 4   Salary/ Wage                 654 non-null    object        
 5   Hours Worked                 654 non-null    float64       
 6   Birth Date                   648 non-null    datetime64[ns]
 7   Occupation                   654 non-null    object        
 8   Finish Date                  512 non-null    datetime64[ns]
 9   Ethnicity                    654 non-null    object        
 10  Visa Expiry                  654 non-null    object        
 11  Visa Type                    654 non-null    objec

In [24]:

print("="*50)
print("South Salaried")
south_salaried_file,leave_balances_file,terminated_file = separate_files(most_recent_records,south_or_north="IsSouth",salaried_or_waged="IsSalaries")
south_salaried = files_to_dataframe(south_salaried_file,leave_balances_file,terminated_file)
add_region(south_salaried,"South Island")

get_info(south_salaried)

South Salaried
(285, 19)
                                         Name Start Date  \
0  Aitcheson, Shane Andrew                    2015-01-12   
1  Antrobus, Dean                             2021-01-11   
2  Aston, Stephen James                       2023-10-30   
3  Baggstrom, Kimberley Dawn                  2023-09-07   
4  Baker, Anthony Ian                         2024-01-22   

                 Department Employment  Status Salary/ Wage  Hours Worked  \
0  25 - Projects Management          Permanent     Salary            45.0   
1  23 - Projects Commercial          Permanent     Salary            45.0   
2  25 - Projects Management          Permanent     Salary            45.0   
3       13 - Administration          Permanent     Salary            40.0   
4  30 - External Consultant          Permanent     Salary            40.0   

  Birth Date                           Occupation Finish Date  \
0 1985-09-11  Project Manager                             NaT   
1 1969-01-29  Senior 

  concat_df = pd.concat([merged_df,terminated_df],axis=0)


In [25]:
print("="*50)
print("North Salaried")
north_salaried_file,leave_balances_file,terminated_file = separate_files(most_recent_records,south_or_north="IsNorth",salaried_or_waged="IsSalaries")
north_salaried = files_to_dataframe(north_salaried_file,leave_balances_file,terminated_file)

add_region(north_salaried,"North Island")
get_info(north_salaried)

North Salaried
(248, 19)
                                         Name Start Date  \
0  Aguado, Crisiya Quismundo                  2022-11-22   
1  Alombro, Allan Jesus Benitez               2023-01-23   
2  Astashkin, Ivan                            2024-03-25   
3  Baranyai, Damian Miklos                    2021-11-29   
4  Barnes, Stuart Ross                        2023-09-18   

                  Department Employment  Status Salary/ Wage  Hours Worked  \
0    24 - Projects Technical          Permanent     Salary              45   
1   25 - Projects Management          Permanent     Salary              45   
2   25 - Projects Management          Permanent     Salary              45   
3  26 - Future Leaders Progr          Permanent     Salary              45   
4   25 - Projects Management          Permanent     Salary              45   

  Birth Date                           Occupation Finish Date  \
0 1990-12-03  Project Engineer                            NaT   
1 1971-02-04  S

  concat_df = pd.concat([merged_df,terminated_df],axis=0)


In [26]:
north_salaried

Unnamed: 0,Name,Start Date,Department,Employment Status,Salary/ Wage,Hours Worked,Birth Date,Occupation,Finish Date,Ethnicity,Visa Expiry,Visa Type,Gender,Status,Name_Key,Employee Full Name,Sick/Special Leave Balance,Holidays Balance,Region
0,"Aguado, Crisiya Quismundo",2022-11-22,24 - Projects Technical,Permanent,Salary,45,1990-12-03,Project Engineer,NaT,Filipino ...,...,Resident Visa ...,Female,Active,aguadocrisiyaquismundo,Aguado Crisiya Quismundo,4.29,5.12,North Island
1,"Alombro, Allan Jesus Benitez",2023-01-23,25 - Projects Management,Permanent,Salary,45,1971-02-04,Site Manager,NaT,Filipino ...,...,Indefinite Resident Visa ...,Male,Active,alombroallanjesusbenitez,Alombro Allan Jesus Benitez,9.00,5.37,North Island
2,"Astashkin, Ivan",2024-03-25,25 - Projects Management,Permanent,Salary,45,1990-08-24,Site Supervisor,NaT,European ...,21/02/2025 ...,Work Visa ...,Male,Active,astashkinivan,Astashkin Ivan,0.00,0.11,North Island
3,"Baranyai, Damian Miklos",2021-11-29,26 - Future Leaders Progr,Permanent,Salary,45,1998-01-18,Site Supervisor - FLP,NaT,...,...,...,Male,Active,baranyaidamianmiklos,Baranyai Damian Miklos,3.33,9.32,North Island
4,"Barnes, Stuart Ross",2023-09-18,25 - Projects Management,Permanent,Salary,45,1985-12-20,Site Manager,NaT,NZ European ...,...,...,Male,Active,barnesstuartross,Barnes Stuart Ross,10.00,5.37,North Island
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179,"Wellock, Eric Neal",2017-10-24,1 - Leighs Christchurch,Permanent,Salary,50,1960-10-11,Site Manager,2020-06-17,...,...,...,Male,Inactive,wellockericneal,,,,North Island
180,"White, Shannon Tracy",2017-11-13,1 - Leighs Christchurch,Permanent,Salary,40,1988-07-10,Health & Safety Advisor,2019-03-07,...,...,...,Female,Inactive,whiteshannontracy,,,,North Island
181,"Wilkinson, Cain Jordan",2021-09-13,1 - Leighs Christchurch,Permanent,Salary,50,1994-05-02,Site Manager,2023-01-27,...,...,...,Male,Inactive,wilkinsoncainjordan,,,,North Island
182,"Worth, Pamela Helen",2023-05-25,1 - Leighs Christchurch,Permanent,Salary,40,1961-10-11,Human Resources Advisor,2023-09-05,NZ Pakeha ...,...,...,Female,Inactive,worthpamelahelen,,,,North Island


In [27]:
#concatenate north and south for salaried
salaried = pd.concat([south_salaried,north_salaried])
salaried.head()

Unnamed: 0,Name,Start Date,Department,Employment Status,Salary/ Wage,Hours Worked,Birth Date,Occupation,Finish Date,Ethnicity,Visa Expiry,Visa Type,Gender,Status,Name_Key,Employee Full Name,Sick/Special Leave Balance,Holidays Balance,Region
0,"Aitcheson, Shane Andrew",2015-01-12,25 - Projects Management,Permanent,Salary,45.0,1985-09-11,Project Manager,NaT,...,...,...,Male,Active,aitchesonshaneandrew,Aitcheson Shane Andrew,18.53,4.25,South Island
1,"Antrobus, Dean",2021-01-11,23 - Projects Commercial,Permanent,Salary,45.0,1969-01-29,Senior Quantity Surveyor,NaT,...,...,...,Male,Active,antrobusdean,Antrobus Dean,11.2,7.38,South Island
2,"Aston, Stephen James",2023-10-30,25 - Projects Management,Permanent,Salary,45.0,1980-07-16,Site Manager,NaT,British ...,2026-05-09 00:00:00,Accredited Employer Work Visa ...,Male,Active,astonstephenjames,Aston Stephen James,0.0,8.38,South Island
3,"Baggstrom, Kimberley Dawn",2023-09-07,13 - Administration,Permanent,Salary,40.0,1990-05-19,Regional Management Team Administra,NaT,"NZ Maori, European ...",...,...,Female,Active,baggstromkimberleydawn,Baggstrom Kimberley Dawn,10.0,4.37,South Island
4,"Baker, Anthony Ian",2024-01-22,30 - External Consultant,Permanent,Salary,40.0,1983-11-18,Job Pac Consultant,NaT,NZ European ...,...,Permanent Resident ...,Male,Active,bakeranthonyian,Baker Anthony Ian,0.0,3.78,South Island


In [28]:
salaried.info()

<class 'pandas.core.frame.DataFrame'>
Index: 533 entries, 0 to 183
Data columns (total 19 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   Name                         533 non-null    object        
 1   Start Date                   533 non-null    datetime64[ns]
 2   Department                   533 non-null    object        
 3   Employment  Status           533 non-null    object        
 4   Salary/ Wage                 533 non-null    object        
 5   Hours Worked                 533 non-null    float64       
 6   Birth Date                   532 non-null    datetime64[ns]
 7   Occupation                   533 non-null    object        
 8   Finish Date                  376 non-null    datetime64[ns]
 9   Ethnicity                    533 non-null    object        
 10  Visa Expiry                  533 non-null    object        
 11  Visa Type                    533 non-null    objec

### Combining salaried and waged data

In [29]:

mismatch=False
for s,w in zip(salaried.columns,waged.columns):
    if s != w:
        mismatch=True
        
if mismatch: 
    print("There is a mismatch of columns, please reorder appropriately")
    raise ValueError()
else:
    print("No Column Mismatch, can combine waged and salaried!")
    

No Column Mismatch, can combine waged and salaried!


In [30]:
employee_data = pd.concat([salaried,waged])

#drop name key and employee full name
employee_data.drop(columns=["Employee Full Name","Name_Key"],inplace=True)

#add a last updated column

employee_data["Data Last Updated At"] = dt.utcnow()

get_info(employee_data)

(1187, 18)
                                         Name Start Date  \
0  Aitcheson, Shane Andrew                    2015-01-12   
1  Antrobus, Dean                             2021-01-11   
2  Aston, Stephen James                       2023-10-30   
3  Baggstrom, Kimberley Dawn                  2023-09-07   
4  Baker, Anthony Ian                         2024-01-22   

                 Department Employment  Status Salary/ Wage  Hours Worked  \
0  25 - Projects Management          Permanent     Salary            45.0   
1  23 - Projects Commercial          Permanent     Salary            45.0   
2  25 - Projects Management          Permanent     Salary            45.0   
3       13 - Administration          Permanent     Salary            40.0   
4  30 - External Consultant          Permanent     Salary            40.0   

  Birth Date                           Occupation Finish Date  \
0 1985-09-11  Project Manager                             NaT   
1 1969-01-29  Senior Quantity Surve

  employee_data["Data Last Updated At"] = dt.utcnow()


### Exporting tidied data to file for use in powerBI

In [31]:
#reset index on table
employee_data.reset_index(drop=True,inplace=True)

In [32]:
employee_data.to_csv('employee_data_tidy.csv')
print("Data Cleaned successfully!")

Data Cleaned successfully!


In [33]:
#change back to data analytics directory. Add a copy of data to make the active version 
os.chdir(r"C:\Users\alex.jefferies\Leighs Construction Limited\Data Analytics - Documents\General\Projects\HR\People Dashboard")
employee_data.to_csv('employee_data_tidy_active.csv')
print("Data Copied successfully!")

Data Copied successfully!


In [34]:
#render table as html
table_to_html(employee_data)