In [1]:
#Import Dependencies
import pandas as pd
import os

In [2]:
#Create filepaths for csv files with school data
school_data_file = "Resources/schools_complete.csv"
#school_data_file = os.path.join("Resources","school_complete.csv")
student_data_file = "Resources/students_complete.csv"
#student_data_file = os.path.join("Resources","students_complete.csv")

In [3]:
#Read school and student data into DataFrames
school_df = pd.read_csv(school_data_file)
student_df = pd.read_csv(student_data_file)


In [4]:
#Merge DataFrames to complete analysis
students_schools_complete = pd.merge(student_df,school_df,how="left",on="school_name")
students_schools_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

In [5]:
#Calculate # of schools
total_schools =len(students_schools_complete["school_name"].unique())
print(total_schools)

15


In [6]:
#Calculate # of students
total_students= students_schools_complete["Student ID"].count()
print(total_students)

39170


In [15]:
#Calculate district budget
total_budget = school_df['budget'].sum()
print(total_budget)


24649428


In [16]:
#Calculate average math score of all students
avg_math_score = students_schools_complete['math_score'].mean()
print(avg_math_score)


78.98537145774827


In [17]:
#Calculate average reading score of all students
avg_reading_score = students_schools_complete['reading_score'].mean()
print(avg_reading_score)

81.87784018381414


In [18]:
#Calculate % of students with a passing math score
passing_math_count = students_schools_complete[(students_schools_complete["math_score"] >= 70)].count()["student_name"]
percent_passing_math = passing_math_count / float(total_students) * 100
print(percent_passing_math)


74.9808526933878


In [20]:
#Calculate % of students with a passing reading score
passing_reading_count = students_schools_complete[(students_schools_complete["reading_score"] >= 70)].count()["student_name"]
percent_passing_reading = passing_reading_count / float(total_students) * 100
print(percent_passing_reading)


85.80546336482001


In [22]:
#Calculate % of students with passing scores for both math and reading
passing_math_reading_count = students_schools_complete[
    (students_schools_complete["math_score"] >= 70) & (students_schools_complete["reading_score"] >= 70)
].count()["student_name"]
percent_passing_overall = passing_math_reading_count / float(total_students) *100
print(percent_passing_overall) 

65.17232575950983


In [26]:
#Create a DataFrame to display the district's key metrics
district_summary = pd.DataFrame({"Total Schools":[total_schools],"Total Students":[total_students]
, "Total Budget":[total_budget], "Average Math Score":[avg_math_score],"Average Reading Score":[avg_reading_score]
, "% Passing Math":[percent_passing_math],"% Passing Reading":[percent_passing_reading],"% Overall Passing":[percent_passing_overall]})

# Formatting
#district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
#district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)
#district_summary["Average Math Score"] = district_summary["Average Math Score"].map("{:.2f}".format)
#district_summary["Average Reading Score"] = district_summary["Average Reading Score"].map("{:.2f}".format) 
#district_summary["% Passing Math"] = district_summary["% Passing Math"].map("{:.2f}%".format)
#district_summary["% Passing Reading"] = district_summary["% Passing Reading"].map("{:.2f}%".format) 
#district_summary["% Overall Passing"] = district_summary["% Overall Passing"].map("{:.2f}%".format)   

#district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,65.172326


## School Summary

In [65]:
school_types = school_df.set_index(["school_name"])["type"]

In [66]:
per_school_students = students_schools_complete.groupby(["school_name"]).count()["Student ID"]


In [67]:
per_school_budget = students_schools_complete.groupby(["school_name"]).mean()["budget"]
per_school_capita = per_school_budget / per_school_students

In [68]:
per_school_math_score = students_schools_complete.groupby(["school_name"]).mean()["math_score"]
per_school_reading_score = students_schools_complete.groupby(["school_name"]).mean()["reading_score"]


In [69]:
per_school_passing_math = students_schools_complete[(students_schools_complete["math_score"] >= 70)]
per_school_passing_math = per_school_passing_math.groupby(["school_name"]).count()["Student ID"] / per_school_students * 100

per_school_passing_reading = students_schools_complete[(students_schools_complete["reading_score"] >= 70)]
per_school_passing_reading = per_school_passing_reading.groupby(["school_name"]).count()["Student ID"] / per_school_students * 100

per_school_passing_overall = students_schools_complete[(students_schools_complete["math_score"] >= 70) & 
                                (students_schools_complete["reading_score"] >= 70)]
per_school_passing_overall = per_school_passing_overall.groupby(["school_name"]).count()["Student ID"] / per_school_students * 100



In [71]:
school_summary = pd.DataFrame(columns= ["School Type", "Total Students", "Total School Budget"
        , "Per Student Budget", "Average Math Score", "Average Reading Score"
        , "% Passing Math", "% Passing Reading", "% Passing Overall"])

school_summary["School Type"] = school_types
school_summary["Total Students"] = per_school_students
school_summary["Total School Budget"] = per_school_budget
school_summary["Per Student Budget"] = per_school_capita
school_summary["Average Math Score"] = per_school_math_score
school_summary["Average Reading Score"] = per_school_reading_score
school_summary["% Passing Math"] = per_school_passing_math
school_summary["% Passing Reading"] = per_school_passing_reading
school_summary["% Passing Overall"] = per_school_passing_overall



Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,3124928.0,628.0,77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,Charter,1858,1081356.0,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,District,2949,1884411.0,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,District,2739,1763916.0,644.0,77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,Charter,1468,917500.0,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,District,4635,3022020.0,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,Charter,427,248087.0,581.0,83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,District,2917,1910635.0,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,District,4761,3094650.0,650.0,77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,Charter,962,585858.0,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


In [72]:
# Formatting
#school_summary["Total Students"] = school_summary["Total Students"].map("{:,}".format)
#school_summary["Total Budget"] = school_summary["Total Budget"].map("${:,.2f}".format)
#school_summary["Average Math Score"] = school_summary["Average Math Score"].map("{:.2f}".format)
#school_summary["Average Reading Score"] = school_summary["Average Reading Score"].map("{:.2f}".format) 
#school_summary["% Passing Math"] = school_summary["% Passing Math"].map("{:.2f}%".format)
#school_summary["% Passing Reading"] = school_summary["% Passing Reading"].map("{:.2f}%".format) 
#school_summary["% Overall Passing"] = school_summary["% Overall Passing"].map("{:.2f}%".format)

school_summary.sort_index(inplace=True)

school_summary
