In [29]:
import numpy as np
import pandas as pd

pd.set_option('display.max_colwidth', None) # show complete content of a cells in table

# load data frame
df = pd.read_excel(r"C:\\Users\\g21728\\Desktop\\Test.xlsx", sheet_name="Sheet1")

# column list; show object types
# list(df.columns)

# convert float columns to int
float_columns = [ 'rating_overall','rating_balance','rating_culture','rating_career','rating_comp','rating_mgmt','helpful',]
df[float_columns] = df[float_columns].astype("Int64") #Int64 can store missing values as NaN

# convert time to year
df.insert(df.columns.get_loc("date")+1, "year", "") # insert new column after date column
df["year"] = df["date"].str[11:15]

# employee_status -> categories (0: former; 1: current)
df["employee_status"].unique()
employee_status_categories = {'Current Employee': 1, 'Former Employee': 0} # dict with categories
df["employee_status"] = df["employee_status"].replace(employee_status_categories).astype("Int64") # replace strings with ints
df.rename(columns={"employee_status": "current_employee"}, inplace=True) #inplace works with the current dataset. otherwise df["employee_status"] = ... is needed in front

# years at company -> tenure categories & full (1) vs part time (0)
# print(df["years_at_company"].unique()) # show unique values
df.insert(6, "full_time", "")
employee_status_categories = {True: 1, False: 0}
df["full_time"] = df["years_at_company"].str.contains("full-time").map(employee_status_categories).astype("Int64") # if string contains "full-time"

# tenure: cut "years at company" after "for"
df.insert(7, "tenure", "")
df["tenure"] = df["years_at_company"].str.split('for').str[1]
df["tenure"].unique()
# assign tenure categories
tenure_categories = {' more than 3 years':3, ' more than a year':2, ' more than 5 years':4,' more than 8 years':5, ' less than a year':1,' more than 10 years':6}
df["tenure"] = df["tenure"].replace(tenure_categories).astype("Int64")

# categorize recommends
print(df["recommends"].unique()) # ['Recommends' nan 'Approves of CEO' 'Positive Outlook' "Doesn't Recommend" 'Neutral Outlook' 'Negative Outlook' 'No opinion of CEO', 'Disapproves of CEO']
df['recommends'].str.contains('Outlook|CEO', case=False, regex=True).value_counts() # regex OR command
df.insert(df.columns.get_loc("recommends")+1, "outlook_mydata", "") # insert new column after date column
df["outlook_mydata"] = np.where(df['recommends'].str.contains('Outlook', case=False, regex=True) == True, df["recommends"], df["positive_outlook"]) 

df.insert(df.columns.get_loc("approves_of_CEO")+1, "approves_of_CEO_mydata", "") # insert new column after date column
df["approves_of_CEO_mydata"] = np.where(df['recommends'].str.contains('CEO', case=False, regex=True) == True, df["recommends"], df["positive_outlook"]) 

# df.info()
# df.to_csv('data-output/my-data.csv')
# CHECK IF OUTLOOK IN RECOMMENDS WAS TRANSFERRED TO OUTLOOK MY DATA
#  TO THE SAME WITH CEO APPROVAL

df.head(7)

['Recommends' nan 'Approves of CEO' 'Positive Outlook' "Doesn't Recommend"
 'Neutral Outlook' 'Negative Outlook' 'No opinion of CEO'
 'Disapproves of CEO']


Unnamed: 0,company_name,date,year,employee_title,location,current_employee,full_time,tenure,review_title,years_at_company,...,rating_comp,rating_mgmt,recommends,outlook_mydata,positive_outlook,approves_of_CEO,approves_of_CEO_mydata,helpful,advice_to_mgmt,response
0,AlerusFinancial,Wed Jun 17 2020 09:14:03 GMT+0200 (MitteleuropÃ¤ische Sommerzeit),2020,Retirement Specialist,"Minneapolis, MN",1,1,3,Great company,I have been working at Alerus full-time for more than 3 years,...,3,5,Recommends,Positive Outlook,Positive Outlook,Approves of CEO,Positive Outlook,0,,
1,AlerusFinancial,Thu Jun 11 2020 09:15:00 GMT+0200 (MitteleuropÃ¤ische Sommerzeit),2020,Relationship Manager,"East Lansing, MI",1,1,3,Tremendous Company and Culture,I have been working at Alerus full-time for more than 3 years,...,4,5,Recommends,Positive Outlook,Positive Outlook,Approves of CEO,Positive Outlook,0,Keep up the good work in transparency and thoughtful leadership,
2,AlerusFinancial,Tue Jun 09 2020 06:54:06 GMT+0200 (MitteleuropÃ¤ische Sommerzeit),2020,Service Associate,,0,0,2,Great job for students,I worked at Alerus part-time for more than a year,...,4,5,Recommends,Positive Outlook,Positive Outlook,,Positive Outlook,0,,
3,AlerusFinancial,Tue Nov 05 2019 11:14:44 GMT+0100 (MitteleuropÃ¤ische Normalzeit),2019,Retirement Account Analyst,"Albert Lea, MN",1,1,3,Iâ€™m not sure,I have been working at Alerus full-time for more than 3 years,...,2,3,,,,,,0,Donâ€™t pick favorites,
4,AlerusFinancial,Thu Oct 24 2019 06:13:57 GMT+0200 (MitteleuropÃ¤ische Sommerzeit),2019,Plan Administrator,,0,1,4,Great place to work,I worked at Alerus full-time for more than 5 years,...,5,3,Recommends,,,Approves of CEO,,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1081,AARCorp,Sun Apr 14 2013 15:33:37 GMT+0200 (MitteleuropÃ¤ische Sommerzeit),2013,Engineer,"Goldsboro, NC",1,1,1,Horrible to be an engineer!,I have been working at AAR full-time for less than a year,...,1,1,Doesn't Recommend,Negative Outlook,Negative Outlook,Disapproves of CEO,Negative Outlook,2,,
1082,AARCorp,Thu Apr 11 2013 12:39:18 GMT+0200 (MitteleuropÃ¤ische Sommerzeit),2013,,"Wood Dale, IL",1,1,2,Always looking outside for expertise while ignoring inside experience,I have been working at AAR full-time for more than a year,...,2,2,Doesn't Recommend,Neutral Outlook,Neutral Outlook,No opinion of CEO,Neutral Outlook,5,Quit going outside for people and give internal employees a chance to advance. Stop hiring future idiots,
1083,AARCorp,Wed Feb 27 2013 08:44:14 GMT+0100 (MitteleuropÃ¤ische Normalzeit),2013,Logistics,AfghÄnÄ«,0,1,1,AAR IS one Of The Worst companys,I worked at AAR full-time for less than a year,...,5,1,Doesn't Recommend,Negative Outlook,Negative Outlook,,Negative Outlook,1,"I understand things change. AAR hired me and managment didnt even try and find another position for me, I just found out that AAR has done a lot of people like this. When you hire someone and the company get in a tight dont just fire them, AAR management is really bad about that.","Not sure what to do/say, yet contact information is gdellinger@aarcorp.com.. . V/R,. . G-Man"
1084,AARCorp,Mon Feb 18 2013 08:36:58 GMT+0100 (MitteleuropÃ¤ische Normalzeit),2013,,"Wood Dale, IL",1,1,,AAR has great wellness initiatives,I have been working at AAR full-time,...,4,3,Recommends,Positive Outlook,Positive Outlook,Approves of CEO,Positive Outlook,0,encourage their employees to take advantage of these wellness initiatives,


In [21]:
# drop cleaned columns
df.drop(columns=["date", "years_at_company"], inplace=True)

# show table
df.head(10)


Unnamed: 0,company_name,year,employee_title,location,current_employee,full_time,tenure,review_title,pros,cons,...,rating_career,rating_comp,rating_mgmt,recommends,outlook_mydata,positive_outlook,approves_of_CEO,helpful,advice_to_mgmt,response
0,AlerusFinancial,2020,Retirement Specialist,"Minneapolis, MN",1,1,3.0,Great company,The care about you and your family,Lacks training new employees. Many different databases to search for information,...,3,3,5,Recommends,,Positive Outlook,Approves of CEO,0,,
1,AlerusFinancial,2020,Relationship Manager,"East Lansing, MI",1,1,3.0,Tremendous Company and Culture,Diversified financial services company with various opportunities within the organization. Transparent C Suite leadership. Inclusive company culture.,"Multiple systems that store data and information can make it challenging to find specific info, resources, etc.",...,5,4,5,Recommends,,Positive Outlook,Approves of CEO,0,Keep up the good work in transparency and thoughtful leadership,
2,AlerusFinancial,2020,Service Associate,,0,0,2.0,Great job for students,"Good pay for college students, flexible","Could use some newer, updates technology",...,4,4,5,Recommends,,Positive Outlook,,0,,
3,AlerusFinancial,2019,Retirement Account Analyst,"Albert Lea, MN",1,1,3.0,Iâ€™m not sure,"Flexible schedule, working with clients",Unable to move up,...,3,2,3,,,,,0,Donâ€™t pick favorites,
4,AlerusFinancial,2019,Plan Administrator,,0,1,4.0,Great place to work,"Flexible, friendly enviroment, great benefits and pay",Some of the managers clearly have favorites and it shows,...,3,5,3,Recommends,,,Approves of CEO,0,,
5,AlerusFinancial,2019,Service Associate,"Grand Forks, ND",1,0,3.0,"Overall, great company to work for!",Alerus is a great place to work as they are flexible and typically willing to work with many different situations,Human Resources lacks at Alerus,...,3,5,4,Recommends,,Positive Outlook,Approves of CEO,0,,
6,AlerusFinancial,2019,Administrative Assistant,,0,1,,Good first job,"18 days PTO, coworkers and most customers were great to work with.",No room for advancement/low pay,...,3,4,4,Approves of CEO,,,,0,,
7,AlerusFinancial,2019,,,1,1,,Good place to work,"supportive of work/life balance, tools to help with growth, and the company has great values.",none that I can think of,...,4,4,4,Recommends,,Positive Outlook,Approves of CEO,0,,
8,AlerusFinancial,2018,IT Technical Specialist,"Minneapolis, MN",1,1,3.0,IT Technical Specialist,flexible schedule for employees work week,not enough training when starting,...,3,4,3,Recommends,,Positive Outlook,Approves of CEO,0,yes there is room to advance in certain departments,
9,AlerusFinancial,2018,,,0,1,,Account Administrator,"Great benefits, growing company. Room for growth.","Watch out who you report to, it can make or break your experience there. Lots a poor managers.",...,2,3,3,Positive Outlook,,,Approves of CEO,0,,
