# Data Processing

In [11]:
import pandas as pd

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [14]:
# import survey results into a df
path = r"./North American Engineering Mentoring Program.csv"

survey_raw = pd.read_csv(path)

survey_raw

Unnamed: 0,ID,Start time,Completion time,Do you currently have a mentor?,Have you ever had a formal mentor?,"If you do not have a mentor, would you like a mentor?","If you do have a mentor, would you like an additional mentor?",What are the benefits you've gained from having a mentor or what benefits would you like to get from having a mentor?,What topics would be most important for you to discuss with your mentor?,Would you be interested in having a mentor inside of the Engineering organization or outside of Engineering?,"If outside, what function would you want the mentor to be in? (e.g. Sales, Marketing, Supply Chain, Finance, HR, RD&E, Enterprise Excellence, other?)"
0,1,3/8/21 8:36:41,3/8/21 8:37:18,No,No,No,No,,,,
1,2,3/8/21 8:38:12,3/8/21 8:42:08,No,No,Yes,No,I would like to gain a better understanding of...,Communicating Effectively\n;Influencing Others...,Outside,Executive Management in RD&E or a Business Unit
2,3,3/8/21 8:35:57,3/8/21 8:42:42,Yes,No,No,Yes,I have benefited greatly from coworkers and ma...,Career Development\n;Building Your Brand and C...,Inside,
3,4,3/8/21 8:43:25,3/8/21 8:45:32,No,No,Yes,No,"I would like to have help seeing the ""bigger p...",Career Development\n;Building Your Brand and C...,Either,"Marketing, Finance, Sales"
4,5,3/8/21 8:45:50,3/8/21 8:46:35,No,No,No,No,,Technical Skill Building\n;,Either,
5,6,3/8/21 8:59:33,3/8/21 9:00:55,No,Yes,No,No,Career navigation and exposure,Career Development\n;,Outside,other
6,7,3/8/21 9:17:46,3/8/21 9:21:14,No,No,Yes,No,Understanding the business more broadly and ha...,Career Development\n;Leading Teams\n;,Outside,Eagan R&D
7,8,3/8/21 9:22:18,3/8/21 9:28:04,No,No,Yes,No,I feel that it is crucial to one's success to ...,Career Development\n;Building Your Brand and C...,Either,sales or finance
8,9,3/8/21 9:39:28,3/8/21 9:43:04,No,No,Yes,No,Cross functionality across the business segmen...,Career Development\n;Influencing Others (even ...,Either,Someone that has held numerous leadership role...
9,10,3/8/21 9:27:15,3/8/21 10:10:41,Yes,Yes,,No,Having a mentor has helped immensely with thin...,Leading Teams\n;Negotiation Skills\n;Building ...,Either,


In [20]:
survey_raw.drop([0], inplace=True)

# survey_raw.columns

In [21]:
# rename columns so that are easier to deal with
col = {
    'ID':'id', 
    'Start time':'start_time', 
    'Completion time':'end_time',
    'Do you currently have a mentor?':'mentor_current', 
    'Have you ever had a formal mentor?':'mentor_ever',
    'If you do not have a mentor, would you like a mentor?':'mentor_desired',
    'If you do have a mentor, would you like an additional mentor?':'mentor_additional',
    "What are the benefits you've gained from having a mentor or what benefits would you like to get from having a mentor?":'mentor_benefits_str',
    'What topics would be most important for you to discuss with your mentor?':'mentor_topics',
    'Would you be interested in having a mentor inside of the Engineering organization or outside of Engineering?':'mentor_in_out',
    'If outside, what function would you want the mentor to be in? (e.g. Sales, Marketing, Supply Chain, Finance, HR, RD&E, Enterprise Excellence, other?)':'mentor_function'
}

survey_raw = survey_raw.rename(columns=col)
survey_raw.head()

Unnamed: 0,id,start_time,end_time,mentor_current,mentor_ever,mentor_desired,mentor_additional,mentor_benefits_str,mentor_topics,mentor_in_out,mentor_function
1,2,3/8/21 8:38:12,3/8/21 8:42:08,No,No,Yes,No,I would like to gain a better understanding of...,Communicating Effectively\n;Influencing Others...,Outside,Executive Management in RD&E or a Business Unit
2,3,3/8/21 8:35:57,3/8/21 8:42:42,Yes,No,No,Yes,I have benefited greatly from coworkers and ma...,Career Development\n;Building Your Brand and C...,Inside,
3,4,3/8/21 8:43:25,3/8/21 8:45:32,No,No,Yes,No,"I would like to have help seeing the ""bigger p...",Career Development\n;Building Your Brand and C...,Either,"Marketing, Finance, Sales"
4,5,3/8/21 8:45:50,3/8/21 8:46:35,No,No,No,No,,Technical Skill Building\n;,Either,
5,6,3/8/21 8:59:33,3/8/21 9:00:55,No,Yes,No,No,Career navigation and exposure,Career Development\n;,Outside,other


In [22]:
# pull out mentor topics and tidy the data
mentor_topics = survey_raw[['id','mentor_topics']].copy().dropna()
mentor_topics

Unnamed: 0,id,mentor_topics
1,2,Communicating Effectively\n;Influencing Others...
2,3,Career Development\n;Building Your Brand and C...
3,4,Career Development\n;Building Your Brand and C...
4,5,Technical Skill Building\n;
5,6,Career Development\n;
6,7,Career Development\n;Leading Teams\n;
7,8,Career Development\n;Building Your Brand and C...
8,9,Career Development\n;Influencing Others (even ...
9,10,Leading Teams\n;Negotiation Skills\n;Building ...
10,11,Project Execution\n;Technical Skill Building\n...


In [23]:
# create a list of the unique responses to the mentor topic question
topics_all = mentor_topics['mentor_topics']
topic_options = []

# create def here for the cleaning in the loop so that it can be used in the cell below as well
def update(text):
    text = text.replace("Business acumen;","Business acumen\n")
    text = text.replace(";","\n")
    text = text.replace("\n;","\n")
    text = text.title()
    text = text.splitlines()
    return text

for response in topics_all:
    response = update(response)
    topic_options.extend(response)

topics_ = list(set(topic_options))
topics_ = [i for i in topics_ if i]
topics_
    

['Communicating Effectively',
 'Strategic Resource Allocation',
 'Career Development',
 'Project Execution',
 'Further Formal Education Or Corporate Training',
 'Sounding Board, Thoughts And Ideas',
 'Business Acumen',
 'Leading Teams',
 'Building Your Brand And Credibility In The Organization',
 'Negotiation Skills',
 "Influencing Others (Even If You Don'T Have Direct Reports)  ",
 'Technical Skill Building']

In [24]:
mentor_topics = mentor_topics.reindex(columns=[*mentor_topics.columns.tolist(), *topics_], fill_value=0)

In [25]:
for ix, row in mentor_topics.iterrows():
    topics = update(row['mentor_topics'])
    topics = [i for i in topics if i]
    for topic in topics:
        mentor_topics.at[ix, topic] = 1

# mentor_topics.loc[:,'Total'] = mentor_topics.sum(numeric_only=True, axis=1) # this adds a column that is the total of that row
mentor_topics.loc['Total'] = mentor_topics.sum(numeric_only=True, axis=0) # this adds a row, that is the total for that column
mentor_topics.drop(['mentor_topics'], axis=1, inplace=True)

save_path_topics = r"C:\Users\coffmlv\Documents\1_GEI\Mentor_program\NA Mentor Topics.csv"
mentor_topics.to_csv(save_path_topics)

mentor_topics.drop('id', axis=1, inplace=True)


In [26]:
survey = pd.concat([survey_raw, mentor_topics], axis=1)
survey

Unnamed: 0,id,start_time,end_time,mentor_current,mentor_ever,mentor_desired,mentor_additional,mentor_benefits_str,mentor_topics,mentor_in_out,mentor_function,Communicating Effectively,Strategic Resource Allocation,Career Development,Project Execution,Further Formal Education Or Corporate Training,"Sounding Board, Thoughts And Ideas",Business Acumen,Leading Teams,Building Your Brand And Credibility In The Organization,Negotiation Skills,Influencing Others (Even If You Don'T Have Direct Reports),Technical Skill Building
1,2.0,3/8/21 8:38:12,3/8/21 8:42:08,No,No,Yes,No,I would like to gain a better understanding of...,Communicating Effectively\n;Influencing Others...,Outside,Executive Management in RD&E or a Business Unit,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
2,3.0,3/8/21 8:35:57,3/8/21 8:42:42,Yes,No,No,Yes,I have benefited greatly from coworkers and ma...,Career Development\n;Building Your Brand and C...,Inside,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,4.0,3/8/21 8:43:25,3/8/21 8:45:32,No,No,Yes,No,"I would like to have help seeing the ""bigger p...",Career Development\n;Building Your Brand and C...,Either,"Marketing, Finance, Sales",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
4,5.0,3/8/21 8:45:50,3/8/21 8:46:35,No,No,No,No,,Technical Skill Building\n;,Either,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5,6.0,3/8/21 8:59:33,3/8/21 9:00:55,No,Yes,No,No,Career navigation and exposure,Career Development\n;,Outside,other,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,7.0,3/8/21 9:17:46,3/8/21 9:21:14,No,No,Yes,No,Understanding the business more broadly and ha...,Career Development\n;Leading Teams\n;,Outside,Eagan R&D,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
7,8.0,3/8/21 9:22:18,3/8/21 9:28:04,No,No,Yes,No,I feel that it is crucial to one's success to ...,Career Development\n;Building Your Brand and C...,Either,sales or finance,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
8,9.0,3/8/21 9:39:28,3/8/21 9:43:04,No,No,Yes,No,Cross functionality across the business segmen...,Career Development\n;Influencing Others (even ...,Either,Someone that has held numerous leadership role...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
9,10.0,3/8/21 9:27:15,3/8/21 10:10:41,Yes,Yes,,No,Having a mentor has helped immensely with thin...,Leading Teams\n;Negotiation Skills\n;Building ...,Either,,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0
10,11.0,3/8/21 10:15:57,3/8/21 10:19:23,Yes,No,,No,Benefits include more years experience in the ...,Project Execution\n;Technical Skill Building\n...,Inside,,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0


In [27]:
survey_bin = survey.copy()
survey_bin = survey_bin.replace('No', 0)
survey_bin = survey_bin.replace('Yes', 1)
survey_bin.drop(index='Total', inplace=True)
survey_bin.loc['Total'] = survey_bin.sum(numeric_only=True, axis=0)
survey_bin

Unnamed: 0,id,start_time,end_time,mentor_current,mentor_ever,mentor_desired,mentor_additional,mentor_benefits_str,mentor_topics,mentor_in_out,mentor_function,Communicating Effectively,Strategic Resource Allocation,Career Development,Project Execution,Further Formal Education Or Corporate Training,"Sounding Board, Thoughts And Ideas",Business Acumen,Leading Teams,Building Your Brand And Credibility In The Organization,Negotiation Skills,Influencing Others (Even If You Don'T Have Direct Reports),Technical Skill Building
1,2.0,3/8/21 8:38:12,3/8/21 8:42:08,0.0,0.0,1.0,0.0,I would like to gain a better understanding of...,Communicating Effectively\n;Influencing Others...,Outside,Executive Management in RD&E or a Business Unit,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0
2,3.0,3/8/21 8:35:57,3/8/21 8:42:42,1.0,0.0,0.0,1.0,I have benefited greatly from coworkers and ma...,Career Development\n;Building Your Brand and C...,Inside,,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,4.0,3/8/21 8:43:25,3/8/21 8:45:32,0.0,0.0,1.0,0.0,"I would like to have help seeing the ""bigger p...",Career Development\n;Building Your Brand and C...,Either,"Marketing, Finance, Sales",0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
4,5.0,3/8/21 8:45:50,3/8/21 8:46:35,0.0,0.0,0.0,0.0,,Technical Skill Building\n;,Either,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5,6.0,3/8/21 8:59:33,3/8/21 9:00:55,0.0,1.0,0.0,0.0,Career navigation and exposure,Career Development\n;,Outside,other,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,7.0,3/8/21 9:17:46,3/8/21 9:21:14,0.0,0.0,1.0,0.0,Understanding the business more broadly and ha...,Career Development\n;Leading Teams\n;,Outside,Eagan R&D,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
7,8.0,3/8/21 9:22:18,3/8/21 9:28:04,0.0,0.0,1.0,0.0,I feel that it is crucial to one's success to ...,Career Development\n;Building Your Brand and C...,Either,sales or finance,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
8,9.0,3/8/21 9:39:28,3/8/21 9:43:04,0.0,0.0,1.0,0.0,Cross functionality across the business segmen...,Career Development\n;Influencing Others (even ...,Either,Someone that has held numerous leadership role...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0
9,10.0,3/8/21 9:27:15,3/8/21 10:10:41,1.0,1.0,,0.0,Having a mentor has helped immensely with thin...,Leading Teams\n;Negotiation Skills\n;Building ...,Either,,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0
10,11.0,3/8/21 10:15:57,3/8/21 10:19:23,1.0,0.0,,0.0,Benefits include more years experience in the ...,Project Execution\n;Technical Skill Building\n...,Inside,,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0


In [29]:
save_path = r"./NA Mentor Cleaned.csv"
save_path_bin = r"./NA Mentor Cleaned binary.csv"

survey.to_csv(save_path)
survey_bin.to_csv(save_path_bin)