## Python Mini-Project: Data Clean-Up, Pt. 2

Pay close attention to the prompts to help guide you through this task.

In [1]:
# Dependencies
import numpy as np
import pandas as pd

In [2]:
# load CSV
fcc_new_df = pd.read_csv("./Resources/2016-FCC-New-Coders-Survey-Data.csv", encoding='iso-8859-1', low_memory=False)
#fcc_new_df.head(2)

In [3]:
# Read with pandas--low_memory required to suppress errors about mixed data types
# You may need to encode this file

In [4]:
# Take only columns 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111
# Consider using iloc
lim_fcc_df = fcc_new_df.iloc[:, [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111]]#.dropna(how='any')
#lim_fcc_df.head(3)

In [5]:
# Change reading 0 and 1 to No and Yes, respectively
fixed_fcc_df = lim_fcc_df.replace({0.0 :"No", 1.0:"Yes"})


In [6]:
# Extract rows for only those who attended a bootcamp
bc_only_df = fixed_fcc_df.loc[fixed_fcc_df['AttendedBootcamp'] == "Yes"]
#bc_only_df.head(2)

In [7]:
# Create DataFrame of the various boot camps along with the number of participants (enrollees)
# Create a DataFrame from using value_counts
camp_rolls = pd.DataFrame()
camp_rolls['Enrollees'] = bc_only_df['BootcampName'].value_counts()
camp_rolls

Unnamed: 0,Enrollees
General Assembly,90
Flatiron School,54
Dev Bootcamp,48
The Iron Yard,40
Prime Digital Academy,30
...,...
World Tech Makers,1
Code 42,1
Launch School (formerly Tealeaf Academy),1
Alphappl,1


In [8]:
# Count how many attendees of each bootcamp would recommend the bootcamp
#camp_rolls['Recommended'] = bc_only_df["BootcampName"].loc[bc_only_df['BootcampRecommend'] == "Yes"].value_counts().map("{:}".format)
camp_report = pd.DataFrame()
camp_report["Recommenders"] = bc_only_df["BootcampName"].loc[bc_only_df['BootcampRecommend'] == "Yes"].value_counts()
camp_report.head(3)

Unnamed: 0,Recommenders
General Assembly,70
Flatiron School,50
Dev Bootcamp,41


In [9]:
# Merge the two created data frames on the name of tbe bootcamp

camp_report['Enrollees'] = camp_rolls["Enrollees"].map("{:}".format)
camp_report.head()

Unnamed: 0,Recommenders,Enrollees
General Assembly,70,90
Flatiron School,50,54
Dev Bootcamp,41,48
The Iron Yard,31,40
Hack Reactor,27,29


In [13]:
# Calculate percentage of each bootcamp's students who are recommenders
# Think of what should be the numerator and the demoninator
#camp_rolls_calc = camp_rolls[].mean()
#grouped_speed_vehicles_df = vehicles_df.groupby("Speed")
camp_report['% Recommend'] = (camp_report["Recommenders"] / camp_rolls["Enrollees"] * 100).map("{:.2f}%".format)
# Sort results in descending order
camp_report
# Format for percentages

Unnamed: 0,Recommenders,Enrollees,% Recommend
General Assembly,70,90,77.78%
Flatiron School,50,54,92.59%
Dev Bootcamp,41,48,85.42%
The Iron Yard,31,40,77.50%
Hack Reactor,27,29,93.10%
...,...,...,...
We Can Code IT,1,3,33.33%
CodeCraft School,1,1,100.00%
Starter League,1,3,33.33%
CodeMasters Academy,1,1,100.00%


In [11]:
# Export to excel and remove index

camp_report.to_excel("./output/bootcamp_report.xls")
