In [122]:
# Import dependencies
import pandas as pd
import numpy as np
pd.set_option('max_colwidth', 400)

### Extract the Salary and Indebtedness data

In [123]:
# Read the data into a Pandas DataFrame
salary_df = pd.read_csv('Current_Employee_Names__Salaries__and_Position_Titles.csv')
salary_df.head()


Unnamed: 0,Name,Job Titles,Department,Full or Part-Time,Salary or Hourly,Typical Hours,Annual Salary,Hourly Rate
0,"MITCHELL, MARY",TRAFFIC CONTROL AIDE,OFFICE OF EMERGENCY MANAGEMENT,F,SALARY,,42312.0,
1,"ALDERDEN, JACOB M",CAPTAIN,DEPARTMENT OF POLICE,F,SALARY,,159552.0,
2,"JOHNSON, ANTHONY L",MOTOR TRUCK DRIVER,DEPT OF WATER MANAGEMENT,F,HOURLY,40.0,,39.25
3,"SHALABI, MAJDI A",SERGEANT,DEPARTMENT OF POLICE,F,SALARY,,133860.0,
4,"KELLY, LUKE L",LIEUTENANT,DEPARTMENT OF POLICE,F,SALARY,,150318.0,


In [124]:
# Read the data into a Pandas DataFrame
indebtedness_df = pd.read_csv('Employee_Indebtedness_to_the_City_of_Chicago.csv')
indebtedness_df.head()

Unnamed: 0,Date,Department or Agency Name,ARMS Department ID,Total # of Employees,# of Employees with Debt,% Employees with Debt,Total Amount Due
0,06/04/2022,ADMINISTRATIVE HEARING,AHMS,35.0,0.0,0.0,0.0
1,06/04/2022,COMM ANIMAL CARE AND CONTROL,ANIMAL,61.0,1.0,1.6,140.0
2,06/04/2022,AVIATION,AVIATION,1850.0,39.0,2.1,29413.1
3,06/04/2022,BUS AFFAIRS AND CONSUMER PROT,BACP,168.0,2.0,1.2,6021.2
4,06/04/2022,BUILDINGS,BUILDINGS,233.0,2.0,0.9,140.0


In [125]:
# Filtering for only the most recent week of the dataset
indebtedness_df = indebtedness_df.loc[(indebtedness_df["Date"]) == "01/21/2023"]

indebtedness_df.head()

Unnamed: 0,Date,Department or Agency Name,ARMS Department ID,Total # of Employees,# of Employees with Debt,% Employees with Debt,Total Amount Due
18920,01/21/2023,ADMINISTRATIVE HEARING,AHMS,36.0,0.0,0.0,0.0
18921,01/21/2023,COMM ANIMAL CARE AND CONTROL,ANIMAL,65.0,1.0,1.5,140.0
18922,01/21/2023,AVIATION,AVIATION,1942.0,30.0,1.5,25730.0
18923,01/21/2023,BUS AFFAIRS AND CONSUMER PROT,BACP,172.0,2.0,1.2,2872.7
18924,01/21/2023,BUILDINGS,BUILDINGS,223.0,3.0,1.3,3805.9


In [126]:
#number of unique debts in indebtedness
print((indebtedness_df['Department or Agency Name'].nunique()))

39


In [127]:
#number of unique debts in salary
print((salary_df['Department'].nunique()))

37


In [128]:
# checking data types
indebtedness_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 39 entries, 18920 to 18958
Data columns (total 7 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Date                       39 non-null     object 
 1   Department or Agency Name  39 non-null     object 
 2   ARMS Department ID         39 non-null     object 
 3   Total # of Employees       39 non-null     float64
 4   # of Employees with Debt   39 non-null     float64
 5   % Employees with Debt      39 non-null     float64
 6   Total Amount Due           39 non-null     float64
dtypes: float64(4), object(3)
memory usage: 2.4+ KB


In [129]:
# checking data types
salary_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31253 entries, 0 to 31252
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Name               31253 non-null  object 
 1   Job Titles         31253 non-null  object 
 2   Department         31253 non-null  object 
 3   Full or Part-Time  31250 non-null  object 
 4   Salary or Hourly   31253 non-null  object 
 5   Typical Hours      6994 non-null   float64
 6   Annual Salary      24259 non-null  float64
 7   Hourly Rate        6994 non-null   float64
dtypes: float64(3), object(5)
memory usage: 1.9+ MB


In [130]:
# Stripping department names of unnecessary words in indebtedness_df to match with department names in salaries_df
# unique values reviewed in excel

salary_df['Department'] = salary_df['Department'].str.replace('DEPARTMENT OF ','')
salary_df['Department'] = salary_df['Department'].str.replace('DEPT OF ','')
salary_df['Department'] = salary_df['Department'].str.replace('CHICAGO DEPT OF ','')
salary_df['Department'] = salary_df['Department'].str.replace('COPA','CIVILIAN OFFICE OF POLICE ACCOUNTABILITY')
salary_df['Department'] = salary_df['Department'].str.replace('OFFICE OF EMERGENCY MANAGEMENT','OFFICE OF EMERGENCY MGMT & COM')

salary_df.head()

Unnamed: 0,Name,Job Titles,Department,Full or Part-Time,Salary or Hourly,Typical Hours,Annual Salary,Hourly Rate
0,"MITCHELL, MARY",TRAFFIC CONTROL AIDE,OFFICE OF EMERGENCY MGMT & COM,F,SALARY,,42312.0,
1,"ALDERDEN, JACOB M",CAPTAIN,POLICE,F,SALARY,,159552.0,
2,"JOHNSON, ANTHONY L",MOTOR TRUCK DRIVER,WATER MANAGEMENT,F,HOURLY,40.0,,39.25
3,"SHALABI, MAJDI A",SERGEANT,POLICE,F,SALARY,,133860.0,
4,"KELLY, LUKE L",LIEUTENANT,POLICE,F,SALARY,,150318.0,


In [131]:
# Update hourly wages with extrapolated annual pay assuming full 52 weeks of working
#pd.notnull checks for null values, if not null, will not change, if null will change with formula for Annual Salary

salary_df['Annual Salary'] = salary_df['Annual Salary'].where(pd.notnull(salary_df['Annual Salary']), salary_df['Typical Hours'] * salary_df['Hourly Rate'] * 52)

salary_df

Unnamed: 0,Name,Job Titles,Department,Full or Part-Time,Salary or Hourly,Typical Hours,Annual Salary,Hourly Rate
0,"MITCHELL, MARY",TRAFFIC CONTROL AIDE,OFFICE OF EMERGENCY MGMT & COM,F,SALARY,,42312.0,
1,"ALDERDEN, JACOB M",CAPTAIN,POLICE,F,SALARY,,159552.0,
2,"JOHNSON, ANTHONY L",MOTOR TRUCK DRIVER,WATER MANAGEMENT,F,HOURLY,40.0,81640.0,39.25
3,"SHALABI, MAJDI A",SERGEANT,POLICE,F,SALARY,,133860.0,
4,"KELLY, LUKE L",LIEUTENANT,POLICE,F,SALARY,,150318.0,
...,...,...,...,...,...,...,...,...
31248,"MARCIANO, DAVID",POLICE OFFICER,POLICE,F,SALARY,,97974.0,
31249,"IBRAHIM, NOOR M",POLICE OFFICER,POLICE,F,SALARY,,93264.0,
31250,"CHORZEPA, PETER J",POLICE OFFICER,POLICE,F,SALARY,,97974.0,
31251,"SEPCOT, SEAN W",POLICE OFFICER,POLICE,F,SALARY,,104934.0,


In [132]:
# Filtering by hourly to ensure replacement of NaN worked
hourly_only = salary_df.loc[(salary_df["Salary or Hourly"]) == "HOURLY"]

hourly_only

Unnamed: 0,Name,Job Titles,Department,Full or Part-Time,Salary or Hourly,Typical Hours,Annual Salary,Hourly Rate
2,"JOHNSON, ANTHONY L",MOTOR TRUCK DRIVER,WATER MANAGEMENT,F,HOURLY,40.0,81640.0,39.25
20,"BERTONI, MICHAEL J",MOTOR TRUCK DRIVER,AVIATION,F,HOURLY,40.0,81640.0,39.25
22,"CARR, ZERRICK B",MOTOR TRUCK DRIVER,CHICAGO PUBLIC LIBRARY,F,HOURLY,40.0,82867.2,39.84
24,"METOYER, FARLEY J",MOTOR TRUCK DRIVER,DEPT STREETS AND SANITATION,F,HOURLY,40.0,82867.2,39.84
31,"FEENY, JOSEPH",FOREMAN OF MOTOR TRUCK DRIVERS,DEPT STREETS AND SANITATION,F,HOURLY,40.0,86132.8,41.41
...,...,...,...,...,...,...,...,...
29327,"DUNLEVY, LAWRENCE D",CONCRETE LABORER,CHICAGO TRANSPORTATION,F,HOURLY,40.0,95472.0,45.90
29809,"WHITE, JOSHUA P",CONSTRUCTION LABORER,WATER MANAGEMENT,F,HOURLY,40.0,95472.0,45.90
30053,"DENNIS, QIANA A",GENERAL LABORER - DSS,DEPT STREETS AND SANITATION,F,HOURLY,40.0,45198.4,21.73
30456,"NUNN, ANNETTE J",SANITATION LABORER,DEPT STREETS AND SANITATION,F,HOURLY,40.0,83574.4,40.18


In [133]:
# Aggregation of salary by department into new dataframe

In [134]:
# merge new salary by dataframe into indebtedness dataframe
# note departments that are not merged (37 vs 39 unique values)

In [135]:
# upload to database