In [51]:
# Dependencies and Setup
import pandas as pd

school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete_rename = school_data_complete.rename(columns={"student_name":"Student Name", "gender":"Gender", "grade":"Grade", "school_name":"School", "reading_score":"Reading Score", "math_score":"Math Score", "budget":"Budget", "type":"Type"})

school_data_complete_rename.head()

Unnamed: 0,Student ID,Student Name,Gender,Grade,School,Reading Score,Math Score,School ID,Type,size,Budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [52]:
#District Summary

#Total Schools
schools_unique = school_data_complete_rename["School"].unique()
total_schools = len(schools_unique)

#Total Students
total_students = len(school_data_complete_rename["Student ID"])

#Total Budget
total_budget = school_data_complete_rename["Budget"].sum()

#Average Math Score
average_math_score = school_data_complete_rename["Math Score"].mean()

#Average Reading Score
average_reading_score = school_data_complete_rename["Reading Score"].mean()

#Overall Pass Rate (Overall Average Score)
overall_pass_rate = float((average_reading_score + average_math_score)) / 2

#Percent students passing math
pass_total_m = 0
for score in school_data_complete_rename["Math Score"]:
    if score >= 70:
        pass_total_m += 1
percent_pass_m = float((pass_total_m) / len(school_data_complete_rename["Math Score"])) * 100

#Percent students passing reading
pass_total_r = 0
for score in school_data_complete_rename["Reading Score"]:
    if score >= 70:
        pass_total_r += 1
percent_pass_r = float((pass_total_r) / len(school_data_complete_rename["Reading Score"])) * 100

#Put data in a dataframe
data_summary_df = pd.DataFrame({"Total Number Schools":[total_schools], "Total Number Students":[total_students], "Average Reading Score":[average_reading_score], "Reading Pass Rate":[percent_pass_r], "Average Math Score":[average_math_score], "Math Pass Rate":[percent_pass_m], "Overall Pass Rate":[overall_pass_rate], "Total Budget":[total_budget]})

#Formatting Data
data_summary_df["Total Number Students"] = data_summary_df["Total Number Students"].map("{:,}".format)
data_summary_df["Average Reading Score"] = data_summary_df["Average Reading Score"].map("{:.2f}".format)
data_summary_df["Reading Pass Rate"] = data_summary_df["Reading Pass Rate"].map("{:.2f}%".format)
data_summary_df["Average Math Score"] = data_summary_df["Average Math Score"].map("{:.2f}".format)
data_summary_df["Math Pass Rate"] = data_summary_df["Math Pass Rate"].map("{:.2f}%".format)
data_summary_df["Overall Pass Rate"] = data_summary_df["Overall Pass Rate"].map("{:.2f}%".format)
data_summary_df["Total Budget"] = data_summary_df["Total Budget"].map("${:,.2f}".format)
data_summary_df.head()

Unnamed: 0,Total Number Schools,Total Number Students,Average Reading Score,Reading Pass Rate,Average Math Score,Math Pass Rate,Overall Pass Rate,Total Budget
0,15,39170,81.88,85.81%,78.99,74.98%,80.43%,"$82,932,329,558.00"


In [80]:
#School Summary

#Group data by school
data_by_school = school_data_complete_rename.groupby("School")
data_by_school.count().head()

#School type
school_type = data_by_school["Type"].unique()
data_by_school_df = pd.DataFrame({"Type":school_type})
data_by_school_df
#Number of students
data_by_school_df["Number of Students"] = data_by_school["Student ID"].count().map("{:,}".format)
data_by_school_df

Unnamed: 0_level_0,Type,Number of Students
School,Unnamed: 1_level_1,Unnamed: 2_level_1
Bailey High School,[District],4976
Cabrera High School,[Charter],1858
Figueroa High School,[District],2949
Ford High School,[District],2739
Griffin High School,[Charter],1468
Hernandez High School,[District],4635
Holden High School,[Charter],427
Huang High School,[District],2917
Johnson High School,[District],4761
Pena High School,[Charter],962
