# Bootcamp Data Clean

In [None]:
import pandas as pd
import numpy as np 

# Read from CSV
camp_data = pd.read_csv("../resources/2016-FCC-New-Coders-Survey-Data.CSV", low_memory=False)
camp_data.head()

## Part 1

In [2]:
# Take only columns 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111
camp1_data = camp_data.iloc[:,[0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111]]

In [3]:
# Change reading 0 and 1 to No and Yes
camp1_data = camp1_data.replace(0,'No')
camp1_data = camp1_data.replace(1,'Yes')


In [4]:
# Extract rows for only those who attended a bootcamp
attended_camp = camp1_data.loc[camp1_data['AttendedBootcamp']=='Yes']
attended_camp.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
93,32.0,Yes,Yes,No,No,Codify Academy,,No,,"between 100,000 and 1 million",...,United States of America,"arts, entertainment, sports, or media",Self-employed business owner,male,,20,67000.0,,bachelor's degree,Biology
97,26.0,Yes,Yes,Yes,No,DaVinci Coders,45000.0,No,,more than 1 million,...,United States of America,software development,Employed for wages,male,No,10,40000.0,,master's degree (non-professional),Music
130,41.0,Yes,Yes,Yes,Yes,Coder Foundry,75000.0,Yes,3.0,"less than 100,000",...,United States of America,software development,Employed for wages,male,Yes,30,75000.0,,"some college credit, no degree",
159,26.0,Yes,Yes,No,No,General Assembly,,No,,"between 100,000 and 1 million",...,United States of America,,Not working and not looking for work,female,,30,,Full-Stack Web Developer,"some college credit, no degree",
188,24.0,Yes,No,,Yes,,,No,,"between 100,000 and 1 million",...,Canada,,Not working but looking for work,female,,60,,,"some college credit, no degree",


In [5]:
# Create DataFrame of the different boot camps that had a significant number of attendees
grouped_camp_att = attended_camp.groupby(['BootcampName'])

att_count = grouped_camp_att['BootcampName'].count()

summary_table1 = pd.DataFrame({
    'Number of Attendees': att_count
})

summary_table1_sorted = summary_table1.sort_values('Number of Attendees', ascending=False).head(10)

summary_table1_sorted

Unnamed: 0_level_0,Number of Attendees
BootcampName,Unnamed: 1_level_1
General Assembly,90
Flatiron School,54
Dev Bootcamp,48
The Iron Yard,40
Prime Digital Academy,30
Hack Reactor,29
Turing,27
App Academy,22
Hackbright Academy,22
Bloc.io,21


In [6]:
# Count how many attendees of each bootcamp would recommend the bootcamp
len(attended_camp['BootcampName'].unique())

129

In [7]:
reco_camp = attended_camp.loc[attended_camp['BootcampRecommend']=='Yes']

grouped_camp_reco = reco_camp.groupby(['BootcampName'])

reco_count = grouped_camp_reco['BootcampName'].count()

summary_table2 = pd.DataFrame({
    'Number Recommended': reco_count
})

summary_table2_sorted = summary_table2.sort_values('Number Recommended', ascending=False).head(10)

summary_table2_sorted

Unnamed: 0_level_0,Number Recommended
BootcampName,Unnamed: 1_level_1
General Assembly,70
Flatiron School,50
Dev Bootcamp,41
The Iron Yard,31
Hack Reactor,27
Turing,26
Prime Digital Academy,25
App Academy,20
Hackbright Academy,19
MakerSquare,18


In [8]:
# Merge the two created data frames on the name of tbe bootcamp
add_summary_table1 = summary_table1.reset_index()
add_summary_table2 = summary_table2.reset_index()

add_summary_table2['Number Recommended'] = pd.to_numeric(add_summary_table2['Number Recommended'])

merged_summary_table = pd.merge(add_summary_table1, add_summary_table2, how="left", on=["BootcampName", "BootcampName"])
merged_summary_table = merged_summary_table.set_index('BootcampName')

In [9]:
# Calculate percentage of eac bootcamp's students who are recommenders
merged_summary_table['Percentage Recommended'] = merged_summary_table['Number Recommended'] / merged_summary_table['Number of Attendees'] *100

merged_summary_table_sorted = merged_summary_table.sort_values('Percentage Recommended', ascending=False)

merged_clean_sorted = merged_summary_table_sorted
merged_clean_sorted['Number Recommended'] = merged_clean_sorted['Number Recommended'].map('{:,.0f}'.format)
merged_clean_sorted['Percentage Recommended'] = merged_clean_sorted['Percentage Recommended'].map('{:,.0f}'.format)

merged_clean_sorted

Unnamed: 0_level_0,Number of Attendees,Number Recommended,Percentage Recommended
BootcampName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CodeCraft School,1,1,100
LEARN Academy,3,3,100
Origin Code Academy,1,1,100
CodeaCamp,8,8,100
Codecademy Labs,2,2,100
Viking Code School,10,10,100
Coder Factory,2,2,100
Coder Foundry,1,1,100
Coder's Lab,1,1,100
Stackademy,1,1,100
