In [93]:
# Dependencies
import pandas as pd

In [94]:
# load CSV
file = "Resources/2016-FCC-New-Coders-Survey-Data.csv"

In [95]:
# Read with pandas--low_memory required to suppress errors about mixed data types
df = pd.read_csv(file, low_memory = False)
df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampMonthsAgo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,...,ResourceSoloLearn,ResourceStackOverflow,ResourceTreehouse,ResourceUdacity,ResourceUdemy,ResourceW3Schools,ResourceYouTube,SchoolDegree,SchoolMajor,StudentDebtOwe
0,28.0,0.0,,,,,,,,,...,,,,,,,,"some college credit, no degree",,20000.0
1,22.0,0.0,,,,,,,,,...,,,,,1.0,,,"some college credit, no degree",,
2,19.0,0.0,,,,,,,,,...,,,,,,,,high school diploma or equivalent (GED),,
3,26.0,0.0,,,,,,,,,...,,,,,,,,bachelor's degree,Cinematography And Film,7000.0
4,20.0,0.0,,,,,,,,,...,,,,,,,,"some college credit, no degree",,


In [96]:
# Take only columns 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111
reduced_df = df.iloc[:,[0,1,2,3,4,6,  7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111]]
reduced_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,office and administrative support,Employed for wages,male,0.0,30.0,32000.0,,"some college credit, no degree",
1,22.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,food and beverage,Employed for wages,male,,30.0,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,0.0,,,,,,,,more than 1 million,...,United States of America,finance,Employed for wages,male,,20.0,48000.0,,high school diploma or equivalent (GED),
3,26.0,0.0,,,,,,,,more than 1 million,...,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20.0,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,education,Employed for wages,female,,25.0,6000.0,Full-Stack Web Developer,"some college credit, no degree",


In [97]:
# Change reading 0 and 1 to No and Yes, respectively
reduced_df = reduced_df.replace({0: 'No', 1: 'Yes'})
# Verify clean-up.
reduced_df['AttendedBootcamp'].value_counts()

No     14427
Yes      953
Name: AttendedBootcamp, dtype: int64

In [98]:
# Extract rows for only those who attended a bootcamp
attended_bc = reduced_df.loc[reduced_df["AttendedBootcamp"] == "Yes"]
attended_bc.count()

Age                     818
AttendedBootcamp        953
BootcampFinish          933
BootcampFullJobAfter    635
BootcampLoanYesNo       934
BootcampName            895
BootcampPostSalary      330
BootcampRecommend       937
ChildrenNumber          148
CityPopulation          823
CodeEventBootcamp        27
CountryLive             825
EmploymentField         476
EmploymentStatus        792
Gender                  833
HasChildren             211
HoursLearning           925
Income                  430
JobRoleInterest         402
SchoolDegree            832
SchoolMajor             622
dtype: int64

In [99]:
# Create DataFrame of the different boot camps that had a significant number of attendees
attended_bc["BootcampName"].value_counts()

General Assembly                                                                 90
Flatiron School                                                                  54
Dev Bootcamp                                                                     48
The Iron Yard                                                                    40
Prime Digital Academy                                                            30
Hack Reactor                                                                     29
Turing                                                                           27
App Academy                                                                      22
Hackbright Academy                                                               22
Code Fellows                                                                     21
Bloc.io                                                                          21
New York Code + Design Academy                                              

In [121]:
#filter schools with fewer than 10 attendents and save as df
schoolList = attended_bc.groupby("BootcampName").filter(lambda x :len(x) > 9)
#schoolList["BootcampName"].value_counts()
schoolList_df = pd.DataFrame(schoolList["BootcampName"].value_counts())
#schoolList_df.rese
schoolList_df.reset_index()
schoolList_df["Bootcamp Name"] = schoolList_df.index
schoolList_df

Unnamed: 0,BootcampName,Bootcamp Name
General Assembly,90,General Assembly
Flatiron School,54,Flatiron School
Dev Bootcamp,48,Dev Bootcamp
The Iron Yard,40,The Iron Yard
Prime Digital Academy,30,Prime Digital Academy
Hack Reactor,29,Hack Reactor
Turing,27,Turing
Hackbright Academy,22,Hackbright Academy
App Academy,22,App Academy
Code Fellows,21,Code Fellows


In [122]:
# Count how many attendees of each bootcamp would recommend the bootcamp
schoolRec = schoolList.loc[schoolList["BootcampRecommend"] == "Yes"]
schoolRec_df = pd.DataFrame(schoolRec["BootcampName"].value_counts())
schoolRec_df.reset_index().rename(columns = {'index': 'name', "BootcampName": "n_Recommend"})
schoolRec_df["Bootcamp Name"] = schoolRec_df.index
schoolRec_df

Unnamed: 0,BootcampName,Bootcamp Name
General Assembly,70,General Assembly
Flatiron School,50,Flatiron School
Dev Bootcamp,41,Dev Bootcamp
The Iron Yard,31,The Iron Yard
Hack Reactor,27,Hack Reactor
Turing,26,Turing
Prime Digital Academy,25,Prime Digital Academy
App Academy,20,App Academy
Hackbright Academy,19,Hackbright Academy
MakerSquare,18,MakerSquare


In [123]:
# Merge the two created data frames on the name of tbe bootcamp
combined_df = pd.merge(schoolList_df, schoolRec_df, how = 'left', on = "Bootcamp Name" )
combined_df

Unnamed: 0,BootcampName_x,Bootcamp Name,BootcampName_y
0,90,General Assembly,70
1,54,Flatiron School,50
2,48,Dev Bootcamp,41
3,40,The Iron Yard,31
4,30,Prime Digital Academy,25
5,29,Hack Reactor,27
6,27,Turing,26
7,22,Hackbright Academy,19
8,22,App Academy,20
9,21,Code Fellows,15


In [159]:
# Calculate percentage of each bootcamp's students who are recommenders
combined_df["% Recommend"] = combined_df["BootcampName_y"]/combined_df["BootcampName_x"]*100
final_df = combined_df[["Bootcamp Name", "% Recommend"]].reset_index(drop = True)
final_df = final_df.sort_values("% Recommend", ascending = False).round(2)

final_df['% Recommend'] = final_df['% Recommend'].map("{}%".format)
final_df


Unnamed: 0,Bootcamp Name,% Recommend
19,Makers Academy,100.0%
18,Dev Academy,100.0%
24,Viking Code School,100.0%
6,Turing,96.3%
16,Free Code Camp is not a bootcamp - please scro...,93.33%
5,Hack Reactor,93.1%
1,Flatiron School,92.59%
8,App Academy,90.91%
12,MakerSquare,90.0%
7,Hackbright Academy,86.36%


In [162]:
# Export to excel and remove index
writer = pd.ExcelWriter("bootcamps.xlsx", engine='xlsxwriter')
final_df.to_excel(writer,sheet_name = "dataframe", index=False)
writer.save() 