In [101]:
# Import Dependencies for Setup
import pandas as pd

# Load Files
school_data_to_load = "/Users/brianroberts1/Documents/GitHub/pandas_challenge/PyCitySchools/schools_complete_data.csv"
student_data_to_load = "/Users/brianroberts1/Documents/GitHub/pandas_challenge/PyCitySchools/students_complete_data.csv"

# Read the School and Student Data Files and store them into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)
    
# Combine data into single dataset
school_data_whole = pd.merge(student_data, school_data, how="left", on = ["school_name", "school_name"])

In [102]:
### DISTRICT SUMMARY ###

# Calculate the total number of schools in the district
total_number_of_schools = school_data_whole['school_name'].nunique()


# Calculate the total number of students in the district
total_number_of_students = school_data_whole['student_name'].count()


# Calculate the total budget for the district
total_budget = school_data['budget'].sum()


# Calculate the average math score across the district
avg_math_score = school_data_whole['math_score'].mean(skipna=True)

# Calculate the average reading score across the district
avg_reading_score = school_data_whole['reading_score'].mean(skipna=True)

# Calculate the percentage of students with a passing math score, 70 or greater
percent_passing_math = len(school_data_whole[school_data_whole['math_score']>=70])/(total_number_of_students)


# Calculate the percentage of students with a passing reading score, 70 or greater
percent_passing_reading = len(school_data_whole[school_data_whole['reading_score']>=70])/(total_number_of_students)


# Calculate the percentage of students with passing math and reading scores
percent_passing_both = len(school_data_whole[(school_data_whole['math_score']>=70)&(school_data_whole['reading_score']>=70)])/(total_number_of_students)


# Create a dataframe to hold the results above
district_summary = {'Total Schools':[total_number_of_schools],'Total Students':[total_number_of_students],'Total Budget':[total_budget],'Average Math Score':[avg_math_score],'Average Reading Score':[avg_reading_score], 
                    '% Passing Math':[percent_passing_math],'% Passing Reading':[percent_passing_reading],'% Overall Passing':[percent_passing_both]}
district_summary_df = pd.DataFrame(district_summary, columns = ['Total Schools','Total Students','Total Budget','Average Math Score','Average Reading Score','% Passing Math','% Passing Reading','% Overall Passing'])

# Format cells
district_summary_df.style.format({"Total Students":"{:,}","Total Budget":"${:,.2f}","Average Math Score":"{:.2f}","Average Reading Score":"{:.2f}",
                                  "% Passing Math":"{:.2%}","% Passing Reading":"{:.2%}","% Overall Passing":"{:.2%}"})





Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.99,81.88,74.98%,85.81%,65.17%


In [115]:
### SCHOOL SUMMARY ###

# Group the merged data frames by school name  
group_by_school = school_data_whole.groupby(['school_name'])

# Determine the school type for each school
school_type = group_by_school['type'].first()

# Determine the total number of students at each school
school_students = group_by_school['size'].first()

# Determine the total budget for each school
school_budget = group_by_school['budget'].first()

# Calculate the per student budget for each school
school_budget_per_student = (school_budget/school_students)

# Calculate the average math score for each school
school_avg_math = group_by_school['math_score'].mean()

# Calculate the average reading score for each school
school_avg_reading = group_by_school['reading_score'].mean()

# Calculate the percent passing math for each school
school_passing_math = school_data_whole[school_data_whole['math_score']>=70].groupby(['school_name']).size()
school_percent_pass_math = (school_passing_math/school_students)*100

# Calculate the percent passing reading for each school
school_passing_reading = school_data_whole[school_data_whole['reading_score']>=70].groupby(['school_name']).size()
school_percent_pass_read = (school_passing_reading/school_students)*100

# Calculate the percent passing both for each school
school_passing_overall = len((school_passing_reading)&(school_passing_math))
school_percent_pass_ovr = (school_passing_overall/school_students)

# Create a dataframe to hold the results above
school_summary = {'School Type':[school_type],'Total Students':[school_students],'Total School Budget':[school_budget],'Per Student Budget':[school_budget_per_student]
                 ,'Average Math Score':[school_avg_math],'Average Reading Score':[school_avg_reading],'% Passing Math':[school_percent_pass_math],
                 '% Passing Reading':[school_percent_pass_read],'% Overall Passing':[school_percent_pass_ovr]}

school_summary_df = pd.DataFrame(school_summary)

# Format Cells
school_summary_df.style.format({"Total Students":"{:,}",
                                "Total School Budget":"${:,.2f}",
                                "Per Student Budget":"{:,.2f}",
                                "Average Math Score":"{:.2f}",
                                "Average Reading Score":"{:.2f}",
                                "% Passing Math":"{:.2%}",
                                "% Passing Reading":"{:.2%}",
                                "% Overall Passing":"{:.2%}"})




TypeError: unsupported format string passed to Series.__format__

<pandas.io.formats.style.Styler at 0x7fc07f0f9940>

In [104]:
### Top Performing Schools (By % Overall Passing) ###

# Sort and display the top five performing schools by % overall passing
top_schools = school_summary_df.sort_values(by='% Overall Passing',ascending=False)
top_schools.head(5)

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,0 District 1 District 2 Charter 3...,0 2917 1 2949 2 1761 3 4635 4 ...,0 1910635 1 1884411 2 1056600 3 ...,0 655.0 1 639.0 2 600.0 3 652....,school_name Bailey High School 77.048432...,school_name Bailey High School 81.033963...,Bailey High School NaN Cabrera High Schoo...,Bailey High School NaN Cabrera High Schoo...,0 0.005142 1 0.005086 2 0.008518 3...


In [105]:
### Bottom Performing Schools (By % Overall Passing) ###

# Sort and display the bottom five performing schools by % overall passing
bottom_schools = school_summary_df.sort_values(by='% Overall Passing')
bottom_schools.head(5)

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,0 District 1 District 2 Charter 3...,0 2917 1 2949 2 1761 3 4635 4 ...,0 1910635 1 1884411 2 1056600 3 ...,0 655.0 1 639.0 2 600.0 3 652....,school_name Bailey High School 77.048432...,school_name Bailey High School 81.033963...,Bailey High School NaN Cabrera High Schoo...,Bailey High School NaN Cabrera High Schoo...,0 0.005142 1 0.005086 2 0.008518 3...


In [None]:
### Math Scores by Grade ###

# Define a variable to calculate each grades' average math score for each school
nine_avg_math = student_data.loc[student_data['grade']=='9th'].groupby('school_name')['math_score'].mean()
ten_avg_math = student_data.loc[student_data['grade']=='10th'].groupby('school_name')['math_score'].mean()
eleven_avg_math = student_data.loc[student_data['grade']=='11th'].groupby('school_name')['math_score'].mean()
twelve_avg_math = student_data.loc[student_data['grade']=='12th'].groupby('school_name')['math_score'].mean()

# Create pandas dataframe containing variables above
all_avg_math = pd.DataFrame({"9th":nine_avg_math,"10th":ten_avg_math,"11th":eleven_avg_math,"12th":twelve_avg_math})
all_avg_math = all_avg_math[['9th','10th','11th','12th']]
all_avg_math.index.name = "School Name"

# Format Cells
all_avg_math.style.format({'9th':'{:.2f}','10th':'{:.2f}','11th':'{:.2f}','12th':'{:.2f}'})

In [None]:
### Reading Scores by Grade ###
# Define a variable to calculate each grades' average math score for each school
nine_avg_read = student_data.loc[student_data['grade']=='9th'].groupby('school_name')['reading_score'].mean()
ten_avg_read = student_data.loc[student_data['grade']=='10th'].groupby('school_name')['reading_score'].mean()
eleven_avg_read = student_data.loc[student_data['grade']=='11th'].groupby('school_name')['reading_score'].mean()
twelve_avg_read = student_data.loc[student_data['grade']=='12th'].groupby('school_name')['reading_score'].mean()

# Create pandas dataframe containing variables above
all_avg_read = pd.DataFrame({"9th":nine_avg_read,"10th":ten_avg_read,"11th":eleven_avg_read,"12th":twelve_avg_read})
all_avg_read = all_avg_math[['9th','10th','11th','12th']]
all_avg_read.index.name = "School Name"

# Format Cells
all_avg_math.style.format({'9th':'{:.2f}','10th':'{:.2f}','11th':'{:.2f}','12th':'{:.2f}'})

In [None]:
### Scores by School Spending ###

# Create bins to hold per student spending ranges. Be sure to have one more than ranges.
bins = [0, 584.99, 614.99, 644.99, 999.99]
bin_names = ["<$585","$585-614","$615-644",">$644"]

# Create a new dataframe containing the five metrics of student success from above
scores_by_spending = school_summary_df.loc[:,['Average Math Score','Average Reading Score','% Passing Math'
                                             ,'% Passing Reading','% Overall Passing']]

# Add a column titled 'Spending Ranges (Per Student)' that uses the created bins above
scores_by_spending['Spending Ranges (Per Student)'] = pd.cut(school_summary_df['Per Student Budget'],bins,label=bin_names)
scores_by_spending = scores_by_spending.groupby('Spending Ranges (Per Student)')

# Foramt Cells
 scores_by_spending.style.format({"Average Math Score":"{:.2f}",
                                "Average Reading Score":"{:.2f}",
                                "% Passing Math":"{:.2%}",
                                "% Passing Reading":"{:.2%}",
                                "% Overall Passing":"{:.2%}"})

In [None]:
### Scores by School Size ###

In [None]:
### Scores by School Type ###