This program was built to clean, organize, aggregate and summarize Goodwill Central Coast Department of Workfoce program outcomes within a choosen date range.  

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("Program_Enrollment.csv")

Cleaning and converting data to proper data types

In [3]:
df["wage"] = df["wage"].str.replace("$",'').str.strip()
df["wage"] = pd.to_numeric(df["wage"])

  df["wage"] = df["wage"].str.replace("$",'').str.strip()


In [4]:
df["start_date"] = pd.to_datetime(df["start_date"])
df["date_of_hire"] = df["date_of_hire"].str.strip()
df["date_of_hire"] = pd.to_datetime(df["date_of_hire"])
df["actual_close_date"] = pd.to_datetime(df["actual_close_date"])

Date Range Filters

In [5]:
start = input("Enter start date ('YYYY-MM-DD'): ") 
end = input("Enter end date ('YYYY-MM-DD'): ")  

Enter start date ('YYYY-MM-DD'): 2021-07-01
Enter end date ('YYYY-MM-DD'): 2021-12-31


Program Enrollments

In [6]:
#Filters out clients in database who did not start program and ensures individuals with start date but not enrollment status are counted
enrollments = df[(df["start_date"].between(start,end)) & ((df["enrollment_status"].str.contains("No Show")==False) 
                 | (df["enrollment_status"].isnull()))] #All start dates in specified date range 
final_enrollments = enrollments["start_date"].groupby(by=enrollments['department_number']).count() #groups by dept. and counts
final_enrollments

department_number
Cabrillo - 5711-000             2
Community Serv                  6
DOR_Monterey_Employment         1
DOR_SC_Employment               2
JSW -5672-000                   1
Kickstart                       8
LO                              1
MCOE                            2
Monterey DOR SIT - 5692-100     2
Monterey OWP - 5657-100        56
Prop 47 Behav Health           16
SC OWP - 5657-000               3
SC TEMP - 5657-000              3
SLO DOR WEX - 5692-500         12
Workability (MC)                3
Workability (SC)               28
Name: start_date, dtype: int64

Duplication Filter For Enrollments and Job Placements

In [7]:
#Enrollment Duplication. This segment provides information on duplicated data in the system 
#as some clients may be participating in multiple programs at once. 
e_duplicated = df[(df["start_date"].between(start,end)) & ((df["enrollment_status"].str.contains("No Show")==False))] #Enrollment Filter
e_duplicated = e_duplicated[e_duplicated.duplicated(subset = ['wds_id'], keep=False)] #finds duplicates
print("Enrollment Duplication")
print(e_duplicated[["wds_id", "start_date", "department_number"]])
print("####################################")

#Job Placement Duplication 
job_duplicated = df[df["date_of_hire"].between(start,end)] #finds duplicates in the Job Placement Information
job_duplicated = job_duplicated[job_duplicated.duplicated(subset = ['wds_id'], keep=False)]
print("Job Placement Duplication")
print(job_duplicated[["wds_id", "date_of_hire", "department_number"]])


Enrollment Duplication
      wds_id start_date        department_number
2387  3941.0 2021-10-19   SLO DOR WEX - 5692-500
2388  3941.0 2021-12-23                       LO
2757  4149.0 2021-07-07   SLO DOR WEX - 5692-500
2758  4149.0 2021-08-13   SLO DOR WEX - 5692-500
2759  4149.0 2021-10-31   SLO DOR WEX - 5692-500
2777  4167.0 2021-07-06  Monterey OWP - 5657-100
2778  4167.0 2021-10-05  Monterey OWP - 5657-100
2781  4170.0 2021-08-13   SLO DOR WEX - 5692-500
2782  4170.0 2021-12-03   SLO DOR WEX - 5692-500
2802  4187.0 2021-08-12        SC OWP - 5657-000
2803  4187.0 2021-08-26        SC OWP - 5657-000
####################################
Job Placement Duplication
Empty DataFrame
Columns: [wds_id, date_of_hire, department_number]
Index: []


Job Placement Data, External, Internal Placements 

In [8]:
#Internal Job Placement Outcomes
internal = df[df["date_of_hire"].between(start,end)]
internal = internal[internal["employer"].str.contains("Goodwill", na = False, case = False)] #Searches for values that contain "Goodwill" in values 
internal_final = internal["employer"].groupby(by=internal["department_number"]).count() #Counts number of internal job placemetns by Department number
internal_final.rename("Internal_Placements", inplace = True) #Rename title

#External Job Placement Outcomes
external = df[df["date_of_hire"].between(start,end)] #filters all date_of_hire values for those in specified date range
external = external[external["employer"].str.contains("Goodwill", case=False)==False] #Searches for any value without Goodwill in it
external_final = external["employer"].groupby(by=external["department_number"], dropna=True).count() #Counts number of external job placemetns by Department number
external_final.rename("External_Placements", inplace = True) #Rename
external_final

department_number
Monterey OWP - 5657-100    11
SC TEMP - 5657-000          1
Name: External_Placements, dtype: int64

Average Wage Data & Hours 

In [9]:
Avg_Hours_Final = df[df["date_of_hire"].between(start,end)] #filters all date_of_hire values for those in specified date range
Avg_Hours_Final = Avg_Hours_Final[["wage", "department_number", "hours_per_week"]]
Avg_Hours_Final = Avg_Hours_Final.groupby(by=Avg_Hours_Final["department_number"]).mean() #Takes average hours and groups by dept
#Avg_Hours_Final.rename("Average Wage and Hours by Department", inplace = True)

Final Results 

In [10]:
#summarizes all results above in one place by department code (Program). 
print(start, "-", end)
print("ENROLLMENTS")
print(final_enrollments)
print("*************************")
print("INTERNAL PLACEMENTS")
print(internal_final)
print("*************************")
print("EXTERNAL PLACEMENTS")
print(external_final)
print("*************************")
print("AVG HOURS & WAGES")
print(Avg_Hours_Final)


2021-07-01 - 2021-12-31
ENROLLMENTS
department_number
Cabrillo - 5711-000             2
Community Serv                  6
DOR_Monterey_Employment         1
DOR_SC_Employment               2
JSW -5672-000                   1
Kickstart                       8
LO                              1
MCOE                            2
Monterey DOR SIT - 5692-100     2
Monterey OWP - 5657-100        56
Prop 47 Behav Health           16
SC OWP - 5657-000               3
SC TEMP - 5657-000              3
SLO DOR WEX - 5692-500         12
Workability (MC)                3
Workability (SC)               28
Name: start_date, dtype: int64
*************************
INTERNAL PLACEMENTS
department_number
Kickstart                  1
Monterey OWP - 5657-100    2
Name: Internal_Placements, dtype: int64
*************************
EXTERNAL PLACEMENTS
department_number
Monterey OWP - 5657-100    11
SC TEMP - 5657-000          1
Name: External_Placements, dtype: int64
*************************
AVG HOURS & WAGES
 