In [1]:
# Dependencies
import pandas as pd

In [2]:
# load CSV
csv_file= "Resources/2016-FCC-New-Coders-Survey-Data.csv"

In [3]:
# Read with pandas--low_memory required to suppress errors about mixed data types
df = pd.read_csv(csv_file, low_memory=False)

In [4]:
# Take only columns 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111
reduced_df = df.iloc[:,[0,1,2,3,4,6,7,8,9,10,11,29,30,32,36,37,45,48,56,110,111]]

In [5]:
# Change reading 0 and 1 to No and Yes, respectively
reduced_df = reduced_df.replace({0.0: "No", 1.0: "Yes"})

In [6]:
# Extract rows for only those who attended a bootcamp
attendees = reduced_df.loc[reduced_df["AttendedBootcamp"]=="Yes"]
attendees.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
93,32.0,Yes,Yes,No,No,Codify Academy,,No,,"between 100,000 and 1 million",...,United States of America,"arts, entertainment, sports, or media",Self-employed business owner,male,,20,67000.0,,bachelor's degree,Biology
97,26.0,Yes,Yes,Yes,No,DaVinci Coders,45000.0,No,,more than 1 million,...,United States of America,software development,Employed for wages,male,No,10,40000.0,,master's degree (non-professional),Music
130,41.0,Yes,Yes,Yes,Yes,Coder Foundry,75000.0,Yes,3.0,"less than 100,000",...,United States of America,software development,Employed for wages,male,Yes,30,75000.0,,"some college credit, no degree",
159,26.0,Yes,Yes,No,No,General Assembly,,No,,"between 100,000 and 1 million",...,United States of America,,Not working and not looking for work,female,,30,,Full-Stack Web Developer,"some college credit, no degree",
188,24.0,Yes,No,,Yes,,,No,,"between 100,000 and 1 million",...,Canada,,Not working but looking for work,female,,60,,,"some college credit, no degree",


In [14]:
# Create DataFrame of the different boot camps that had a significant number of attendees
significant = pd.DataFrame(attendees["BootcampName"].value_counts())

significant = significant.reset_index()

significant.columns = ["BootcampName","Count"]

significant.head()

Unnamed: 0,BootcampName,Count
0,General Assembly,90
1,Flatiron School,54
2,Dev Bootcamp,48
3,The Iron Yard,40
4,Prime Digital Academy,30


In [15]:
# Count how many attendees of each bootcamp would recommend the bootcamp
recommend_bootcamp = attendees.replace({"Yes":1, "No":2})
recommend_bootcamp = pd.DataFrame(recommend_bootcamp.groupby("BootcampName")["BootcampRecommend"].sum())
recommend_bootcamp = recommend_bootcamp.sort_values("BootcampRecommend", ascending=False)
recommend_bootcamp.reset_index(inplace=True)
recommend_bootcamp.head()

Unnamed: 0,BootcampName,BootcampRecommend
0,General Assembly,110.0
1,Flatiron School,58.0
2,Dev Bootcamp,55.0
3,The Iron Yard,49.0
4,Prime Digital Academy,35.0


In [17]:
# Merge the two created data frames on the name of tbe bootcamp
merge_table = pd.merge(significant,recommend_bootcamp, on = "BootcampName")
merge_table.head()

Unnamed: 0,BootcampName,Count,BootcampRecommend
0,General Assembly,90,110.0
1,Flatiron School,54,58.0
2,Dev Bootcamp,48,55.0
3,The Iron Yard,40,49.0
4,Prime Digital Academy,30,35.0


In [28]:
# Calculate percentage of eac bootcamp's students who are recommenders
merge_table["% Recommend"] = (merge_table ["BootcampRecommend"]/merge_table["Count"]*100)

merge_table["% Recommend"] = merge_table["% Recommend"].map("{:.2f}%".format)


merge_table.head()

Unnamed: 0,BootcampName,Count,BootcampRecommend,% Recommend
0,General Assembly,90,110.0,122.22%
1,Flatiron School,54,58.0,107.41%
2,Dev Bootcamp,48,55.0,114.58%
3,The Iron Yard,40,49.0,122.50%
4,Prime Digital Academy,30,35.0,116.67%


In [34]:
# Export to excel and remove index

merge_table.to_excel("output/Panda_Part_B.xlsx", index=False)