In [1]:
import pandas as pd

### 1. Loading Dataframe

In [2]:
filter_columns = [0, 1, 2, 3, 4, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111]

bootcamp = pd.read_csv('resources/2016-FCC-New-Coders-Survey-Data.csv', usecols=filter_columns)

bootcamp.head(3)

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,CodeEventBootcamp,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,0.0,,,,,,,"between 100,000 and 1 million",,United States of America,office and administrative support,Employed for wages,male,0.0,30.0,32000.0,,"some college credit, no degree",
1,22.0,0.0,,,,,,,"between 100,000 and 1 million",,United States of America,food and beverage,Employed for wages,male,,30.0,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,0.0,,,,,,,more than 1 million,,United States of America,finance,Employed for wages,male,,20.0,48000.0,,high school diploma or equivalent (GED),


### 2. Analyzing Dataframe

In [3]:
bootcamp.shape

(15620, 20)

In [4]:
bootcamp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15620 entries, 0 to 15619
Data columns (total 20 columns):
Age                     13613 non-null float64
AttendedBootcamp        15380 non-null float64
BootcampFinish          933 non-null float64
BootcampFullJobAfter    635 non-null float64
BootcampLoanYesNo       934 non-null float64
BootcampPostSalary      330 non-null float64
BootcampRecommend       937 non-null float64
ChildrenNumber          2554 non-null float64
CityPopulation          13630 non-null object
CodeEventBootcamp       42 non-null float64
CountryLive             13610 non-null object
EmploymentField         7608 non-null object
EmploymentStatus        12904 non-null object
Gender                  13746 non-null object
HasChildren             4012 non-null float64
HoursLearning           14942 non-null float64
Income                  7329 non-null float64
JobRoleInterest         6503 non-null object
SchoolDegree            13679 non-null object
SchoolMajor            

### 3. Replace functions

In [5]:
bootcamp.replace(to_replace = 0.0, value='No', inplace=True)
bootcamp.replace(to_replace = 1.0, value='Yes', inplace=True)

#### 4. Calculate the total number of respondents in the subtable you built.

In [6]:
total_survey = bootcamp.isnull().all(1).count()

total_survey

15620

#### 5. Create a table out of the rows corresponding only to people who did attend a bootcamp.

In [7]:
cond = bootcamp['AttendedBootcamp'] == 'Yes'

attended = bootcamp[cond].copy(deep=True)

total_attended      = attended['AttendedBootcamp'].count()
average_age         = attended['Age'].mean()
gender_male         = (attended['Gender'][attended['Gender'] == 'male'].count() / total_attended) * 100
gender_female       = (attended['Gender'][attended['Gender'] == 'female'].count() / total_attended) * 100
gender_others       = (attended['Gender'][(attended['Gender'] != 'male') & (attended['Gender'] != 'female')].count() / total_attended) * 100
perc_attended       = (len(attended) / bootcamp.isnull().all(1).count()) * 100
perc_college_degree = (attended['SchoolDegree'].count() / total_attended) * 100
average_salary      = attended['BootcampPostSalary'].mean()

attended_data = [{
    "Total Surveyed" : total_survey,
    "Total Bootcamp attendees" : total_attended,
    "% attended bootcamp" : perc_attended,
    "Avg. Age" : average_age,
    "Has a degree" : perc_college_degree,
    "% Male" : gender_male,
    "% Female" : gender_female,
    "% Non Gender Specific" : gender_others,
    "Average Post Bootcamp Salary" : average_salary
}]

bootcamp_summary = pd.DataFrame(data=attended_data, columns=list(attended_data[0].keys()))

bootcamp_summary['% attended bootcamp'] = bootcamp_summary['% attended bootcamp'].map("{:,.2f}%".format)
bootcamp_summary['Avg. Age'] = bootcamp_summary['Avg. Age'].map("{:,.2f}".format)
bootcamp_summary['Has a degree'] = bootcamp_summary['Has a degree'].map("{:,.2f}%".format)
bootcamp_summary['% Male'] = bootcamp_summary['% Male'].map("{:,.2f}%".format)
bootcamp_summary['% Female'] = bootcamp_summary['% Female'].map("{:,.2f}%".format)
bootcamp_summary['% Non Gender Specific'] = bootcamp_summary['% Non Gender Specific'].map("{:,.2f}%".format)
bootcamp_summary['Average Post Bootcamp Salary'] = bootcamp_summary['Average Post Bootcamp Salary'].map("${:,.2f}".format)

bootcamp_summary.to_excel('output/bootcamp_summary.xlsx',index=False)
bootcamp_summary

Unnamed: 0,Total Surveyed,Total Bootcamp attendees,% attended bootcamp,Avg. Age,Has a degree,% Male,% Female,% Non Gender Specific,Average Post Bootcamp Salary
0,15620,953,6.10%,31.07,87.30%,52.05%,34.21%,1.15%,"$63,740.51"
