**BIG BIG HINT! Look in the instructions to guide you on this task.**

In [1]:
# Dependencies
import pandas as pd

In [2]:
# load CSV
csv_path = "Resources/2016-FCC-New-Coders-Survey-Data.csv"

In [3]:
# Read with pandas
df = pd.read_csv(csv_path, encoding = 'iso-8859-1', low_memory=False)

In [4]:
# Inspect all columns
df.columns

Index(['Age', 'AttendedBootcamp', 'BootcampFinish', 'BootcampFullJobAfter',
       'BootcampLoanYesNo', 'BootcampMonthsAgo', 'BootcampName',
       'BootcampPostSalary', 'BootcampRecommend', 'ChildrenNumber',
       ...
       'ResourceSoloLearn', 'ResourceStackOverflow', 'ResourceTreehouse',
       'ResourceUdacity', 'ResourceUdemy', 'ResourceW3Schools',
       'ResourceYouTube', 'SchoolDegree', 'SchoolMajor', 'StudentDebtOwe'],
      dtype='object', length=113)

In [5]:
# Extract only columns 0, 1, 2, 3, 4, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111
reduced_df = df.iloc[:,[0,1,2,3,4,7,8,9,10,11,29,30,32,36,37,45,48,56,110,111]]
reduced_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,CodeEventBootcamp,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,0.0,,,,,,,"between 100,000 and 1 million",,United States of America,office and administrative support,Employed for wages,male,0.0,30.0,32000.0,,"some college credit, no degree",
1,22.0,0.0,,,,,,,"between 100,000 and 1 million",,United States of America,food and beverage,Employed for wages,male,,30.0,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,0.0,,,,,,,more than 1 million,,United States of America,finance,Employed for wages,male,,20.0,48000.0,,high school diploma or equivalent (GED),
3,26.0,0.0,,,,,,,more than 1 million,,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20.0,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,0.0,,,,,,,"between 100,000 and 1 million",,United States of America,education,Employed for wages,female,,25.0,6000.0,Full-Stack Web Developer,"some college credit, no degree",


In [6]:
# Change "0" to "No" and "1" to "Yes" in response columns
reduced_df = reduced_df.replace({0.0: "No", 1.0: "Yes"})
reduced_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,CodeEventBootcamp,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,No,,,,,,,"between 100,000 and 1 million",,United States of America,office and administrative support,Employed for wages,male,No,30,32000.0,,"some college credit, no degree",
1,22.0,No,,,,,,,"between 100,000 and 1 million",,United States of America,food and beverage,Employed for wages,male,,30,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,No,,,,,,,more than 1 million,,United States of America,finance,Employed for wages,male,,20,48000.0,,high school diploma or equivalent (GED),
3,26.0,No,,,,,,,more than 1 million,,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,No,,,,,,,"between 100,000 and 1 million",,United States of America,education,Employed for wages,female,,25,6000.0,Full-Stack Web Developer,"some college credit, no degree",


In [7]:
# Calculate total number of respondents in survey
total_surveyed = len(reduced_df)
total_surveyed

15620

In [8]:
# Extract rows corresponding only to people who attended a bootcamp
attended_bootcamp = reduced_df.loc[reduced_df["AttendedBootcamp"]=="Yes"]
attended_bootcamp.count()

Age                     818
AttendedBootcamp        953
BootcampFinish          933
BootcampFullJobAfter    635
BootcampLoanYesNo       934
BootcampPostSalary      330
BootcampRecommend       937
ChildrenNumber          148
CityPopulation          823
CodeEventBootcamp        27
CountryLive             825
EmploymentField         476
EmploymentStatus        792
Gender                  833
HasChildren             211
HoursLearning           925
Income                  430
JobRoleInterest         402
SchoolDegree            832
SchoolMajor             622
dtype: int64

In [9]:
# Calculate average age of attendees
average_age = attended_bootcamp["Age"].mean()
average_age

31.066014669926652

In [10]:
# Calculate how many people attended a bootcamp
amount_attended = attended_bootcamp["AttendedBootcamp"].count()
amount_attended

953

In [11]:
# Calculate how many attendees hold degrees
degree_holders = attended_bootcamp["SchoolDegree"].count()
degree_holders

832

In [12]:
# Count number of attendees who self-identify as male; female; or are of non-binary gender identification
total_gender_count = attended_bootcamp["Gender"].count()
male = attended_bootcamp["Gender"].value_counts()["male"]
female = attended_bootcamp["Gender"].value_counts()["female"]
non_binary = total_gender_count - male - female

In [13]:
# Calculate percentage of respondents who attended a bootcamp
percent_attended = (amount_attended/total_surveyed)*100
percent_attended

6.101152368758003

In [14]:
# Calculate percentage of respondents belonging to each gender
percent_male = (male/total_gender_count)*100
percent_female = (female/total_gender_count)*100
percent_non_binary = (non_binary/total_gender_count)*100

In [15]:
# Calculate percentage of attendees with a school degree
percent_degree = (degree_holders/amount_attended)*100

In [16]:
# Calculate average post-bootcamp salary
average_salary = attended_bootcamp["BootcampPostSalary"].mean()
average_salary

63740.50606060606

In [17]:
# Create a new table consolodating above calculations
bootcamp_table = pd.DataFrame({"Total Surveyed": [total_surveyed],
                               "Total Attendees": [amount_attended],
                               "% attended Bootcamp": [percent_attended],
                               "Average Age": [average_age],
                               "% Male": [percent_male],
                               "% Female": [percent_female],
                               "% Non-Binary": [percent_non_binary],
                               "% who Earned Degree": [percent_degree],
                               "Average Post Bootcamp Salary": [average_salary]
})
bootcamp_table = bootcamp_table[["Total Surveyed", "Total Attendees", "% attended Bootcamp","Average Age","% Male", 
                                "% Female","% Non-Binary","% who Earned Degree","Average Post Bootcamp Salary"]]

bootcamp_table = bootcamp_table.round(2)

bootcamp_table

Unnamed: 0,Total Surveyed,Total Attendees,% attended Bootcamp,Average Age,% Male,% Female,% Non-Binary,% who Earned Degree,Average Post Bootcamp Salary
0,15620,953,6.1,31.07,59.54,39.14,1.32,87.3,63740.51


In [18]:
# Improve formatting before outputting spreadsheet
bootcamp_table["% attended Bootcamp"]=bootcamp_table["% attended Bootcamp"].map("{0:,.2f}%".format)
bootcamp_table["% Male"]=bootcamp_table["% Male"].map("{0:,.2f}%".format)
bootcamp_table["% Female"]=bootcamp_table["% Female"].map("{0:,.2f}%".format)
bootcamp_table["% Non-Binary"]=bootcamp_table["% Non-Binary"].map("{0:,.2f}%".format)
bootcamp_table["% who Earned Degree"]=bootcamp_table["% who Earned Degree"].map("{0:,.2f}%".format)
bootcamp_table["Average Post Bootcamp Salary"]=bootcamp_table["Average Post Bootcamp Salary"].map("${0:,.2f}".format)

bootcamp_table

Unnamed: 0,Total Surveyed,Total Attendees,% attended Bootcamp,Average Age,% Male,% Female,% Non-Binary,% who Earned Degree,Average Post Bootcamp Salary
0,15620,953,6.10%,31.07,59.54%,39.14%,1.32%,87.30%,"$63,740.51"


In [19]:
# Export to Excel
bootcamp_table.to_excel("output/Bootcamppart1.xlsx", index=False)