# PyCity Schools Analysis

- Analysis here

--- 

In [470]:
# Dependencies and Setup
import pandas as pd
from pathlib import Path

# File to Load (Remember to Change These)
school_data_to_load = Path("Resources/schools_complete.csv")
student_data_to_load = Path("Resources/students_complete.csv")

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])


## District Summary

In [471]:
#calculate number of schools in the district
school_count = school_data_complete["school_name"].nunique()

In [472]:
#calculuate number of students in the district
student_count = school_data_complete["student_name"].count()

In [473]:
#calculate total budget
total_budget = school_data["budget"].sum()


In [474]:
#calculate district-wide average math score
average_math_score = (school_data_complete["math_score"].sum()/student_count)

In [475]:
#calculate district-wide average reading score
average_reading_score = (school_data_complete["reading_score"].sum()/student_count)

In [476]:
#calculate the percent of students who passed math
pass_math_count = school_data_complete[(school_data_complete["math_score"] >= 70)].count()["student_name"]
pass_math_percent = pass_math_count / float(student_count) * 100

In [477]:
#calculate the percent of students who passed reading
pass_reading_count = school_data_complete[(school_data_complete["reading_score"] >= 70)].count()["student_name"]
pass_reading_percent = pass_reading_count / float(student_count) * 100

In [478]:
#calculate the percent of sutdents who passed both math and reading
pass_overall_count = school_data_complete[(school_data_complete["math_score"] >= 70) 
                    & (school_data_complete["reading_score"] >= 70)].count()["student_name"]
pass_overall_percent = pass_overall_count / float(student_count) * 100

In [479]:
#Create a dataframe with district summary statistics
district_summary = pd.DataFrame([{"Total Schools": school_count, "Total Students": student_count, 
                                "Total Budget": total_budget, "Average Math Score": average_math_score, 
                                "Average Reading Score": average_reading_score, "% Passing Math": pass_math_percent, 
                                "% Passing Reading": pass_reading_percent, "% Passing Overall": pass_overall_percent}])
#formatting
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,}".format)
district_summary[["Average Math Score", "Average Reading Score", "% Passing Math", 
                "% Passing Reading", "% Passing Overall"]] = district_summary[["Average Math Score", "Average Reading Score", 
                                                            "% Passing Math", "% Passing Reading", "% Passing Overall"]].round(2)
#display the dataframe
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
0,15,39170,"$24,649,428",78.99,81.88,74.98,85.81,65.17


## School Summary

In [480]:
#Start datafram for school summary from school_data. Remove school id
school_data = school_data.drop("School ID", axis = 1)

In [481]:
#Calculate per student budget and add to dataframe
school_data["Per Student Budget"] = school_data["budget"]/school_data["size"]

In [482]:
#calculate average math and reading scores for each school
average_scores_school = pd.DataFrame(school_data_complete.groupby("school_name")[["math_score", "reading_score"]].mean())

In [483]:
#Add Average scores to School Summary Dataframe
school_summary = pd.merge(school_data, average_scores_school, on = "school_name", how = "outer")

In [484]:
#Calculate percent passing math for each school

#Count number of passing students per school
count_passing_math_school = (school_data_complete[(school_data_complete["math_score"] >= 
                            70)].groupby("school_name").count()["student_name"])

#Create temporary dataframe to store number of passing students and total students per school
percent_passing_math_school = pd.merge(count_passing_math_school, school_data[['school_name','size']],on='school_name', how='left')

#Calculate percent of students passing math
percent_passing_math_school["% Passing Math"] = (percent_passing_math_school["student_name"]/
                                                percent_passing_math_school["size"] *100)


In [485]:
#Add "% Passing Math" to School Summary
school_summary = pd.merge(school_summary, percent_passing_math_school[["school_name","% Passing Math"]], on = "school_name", how= "left")

In [486]:
#Calculate percent passing reading for each school

#Count number of passing students per school
count_passing_reading_school = (school_data_complete[(school_data_complete["reading_score"] >= 
                            70)].groupby("school_name").count()["student_name"])

#Create temporary dataframe to store number of passing students and total students per school
percent_passing_reading_school = pd.merge(count_passing_reading_school, school_data[['school_name','size']],on='school_name', how='left')

#Calculate percent of students passing math
percent_passing_reading_school["% Passing Reading"] = (percent_passing_reading_school["student_name"]/
                                                        percent_passing_reading_school["size"] *100)

In [487]:
#Add "% Passing Reading" to School Summary
school_summary = pd.merge(school_summary, percent_passing_reading_school[["school_name","% Passing Reading"]], on = "school_name", how= "left")

In [488]:
#Calculate percent passing math and reading for each school

#Count number of passing students per school
count_passing_overall_school = (school_data_complete[(school_data_complete["math_score"] >= 
                            70) & (school_data_complete["reading_score"] >= 
                            70)].groupby("school_name").count()["student_name"])

#Create temporary dataframe to store number of passing students and total students per school
percent_passing_overall_school = pd.merge(count_passing_overall_school, school_data[['school_name','size']],on='school_name', how='left')

#Calculate percent of students passing math
percent_passing_overall_school["% Passing Overall"] = (percent_passing_overall_school["student_name"]/
                                                        percent_passing_overall_school["size"] *100)

In [489]:
#Add "% Passing Overall" to School Summary
school_summary = pd.merge(school_summary, percent_passing_overall_school[["school_name","% Passing Overall"]], on = "school_name", how= "left")

In [490]:
#formatting
school_summary = school_summary.rename(columns = {"school_name": "School Name", "type": "School Type", "size": "Total Students", 
                "budget": "Total School Budget", "math_score": "Average Math Score", "reading_score": "Average Reading Score"})
school_summary = school_summary.sort_values(by = "School Name", ascending = True)
school_summary= school_summary.set_index("School Name")
school_summary["Total Students"] = school_summary["Total Students"].map("{:,}".format)
school_summary["Total School Budget"] = school_summary["Total School Budget"].astype(int).map(("${:,}".format))
school_summary["Per Student Budget"] = school_summary["Per Student Budget"].astype(int).map("${:,}".format)
school_summary[["Average Math Score", "Average Reading Score", "% Passing Math", 
                "% Passing Reading", "% Passing Overall"]] = school_summary[["Average Math Score", "Average Reading Score", 
                                                            "% Passing Math", "% Passing Reading", "% Passing Overall"]].round(2)

#display the dataframe
school_summary


Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,"$3,124,928",$628,77.05,81.03,66.68,81.93,54.64
Cabrera High School,Charter,1858,"$1,081,356",$582,83.06,83.98,94.13,97.04,91.33
Figueroa High School,District,2949,"$1,884,411",$639,76.71,81.16,65.99,80.74,53.2
Ford High School,District,2739,"$1,763,916",$644,77.1,80.75,68.31,79.3,54.29
Griffin High School,Charter,1468,"$917,500",$625,83.35,83.82,93.39,97.14,90.6
Hernandez High School,District,4635,"$3,022,020",$652,77.29,80.93,66.75,80.86,53.53
Holden High School,Charter,427,"$248,087",$581,83.8,83.81,92.51,96.25,89.23
Huang High School,District,2917,"$1,910,635",$655,76.63,81.18,65.68,81.32,53.51
Johnson High School,District,4761,"$3,094,650",$650,77.07,80.97,66.06,81.22,53.54
Pena High School,Charter,962,"$585,858",$609,83.84,84.04,94.59,95.95,90.54
