# PyCity Schools Analysis

- Your analysis here

## Original Data

In [179]:
# Import libraries needed
import pandas as pd
from pathlib import Path

# Define paths for csv files
students_csv = Path("Resources/students_complete.csv")
schools_csv = Path("Resources/schools_complete.csv")

# Read files and store into a Pandas DataFrame
students_df = pd.read_csv(students_csv)
schools_df = pd.read_csv(schools_csv)

In [180]:
# Combined student and school data
complete_df = pd.merge(students_df,schools_df,on="school_name",how="left")
complete_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

In [181]:
# District total schools
district_schools = complete_df["school_name"].nunique()

In [182]:
# District total students
district_students = complete_df["student_name"].count()

In [183]:
# District total budget
district_budget = schools_df["budget"].sum()

In [184]:
# District average math score
district_math_score = complete_df["math_score"].mean()

In [185]:
# District average reading score
district_read_score = complete_df["reading_score"].mean()

In [186]:
# District % of students passing math (passing score is >=70%)
district_pass_math = complete_df[(complete_df["math_score"] >= 70)].count()["student_name"]
district_pass_math_perc = district_pass_math / float(district_students) * 100

In [187]:
# District % of students passing reading (passing score is >=70%)
district_pass_read = complete_df[(complete_df["reading_score"] >= 70)].count()["student_name"]
district_pass_read_perc = district_pass_read / float(district_students) * 100

In [188]:
# District % of students passing overall (passing score is >=70%)
district_pass_overall = complete_df[
    (complete_df["reading_score"] >= 70) & (complete_df["math_score"] >= 70)
    ].count()["student_name"]
district_pass_overall_perc = district_pass_overall / float(district_students) * 100

In [189]:
# Disctrict's key metrics summary
district_summary = pd.DataFrame({"Total Schools": district_schools, "Total Students": district_students, "Total Budget": district_budget,
                                 "Average Math Score": district_math_score, "Average Reading Score": district_read_score,
                                 "% Passing Math": district_pass_math_perc, "% Passing Reading": district_pass_read_perc,
                                 "% Overall Passing": district_pass_overall_perc}, index= [0])

# Formatting summary
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)

# Display summary
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


## School Summary

In [190]:
# School types
school_types = schools_df.set_index(["school_name"])["type"]

In [191]:
# Total students per school
school_students = schools_df.set_index(["school_name"])["size"]

In [192]:
# Total budget per school
school_budget = schools_df.set_index(["school_name"])["budget"]

In [193]:
# Average budget per student per school
schools_df["budget_per_student"] = schools_df["budget"]/schools_df["size"]
school_budget_per_student = schools_df.set_index(["school_name"])["budget_per_student"]

In [194]:
# Average math test score per school
school_math_score = complete_df.groupby(["school_name"])["math_score"].mean()

In [195]:
# Average reading test score per school
school_read_score = complete_df.groupby(["school_name"])["reading_score"].mean()

In [196]:
# Students passing the math test per school (passing score is >=70%)
school_pass_math = complete_df[complete_df["math_score"]>=70].groupby(["school_name"]).size()

# students_passing_math = complete_df[complete_df["math_score"]>=70]
# studentsfdfd = students_passing_math.groupby(["school_name"]).size()
# studentsfdfd


In [197]:
# Students passing the reading test per school (passing score is >=70%)
school_pass_read = complete_df[complete_df["reading_score"]>=70].groupby(["school_name"]).size()

In [198]:
# Students passing overall per school (passing score is >=70%)
school_pass_overall = complete_df[(complete_df["math_score"] >= 70) & (complete_df["reading_score"] >= 70)].groupby(["school_name"]).size()

In [199]:
# Passing % per school
school_pass_math_perc = school_pass_math / school_students * 100
school_pass_read_perc = school_pass_read / school_students * 100
school_pass_overall_perc = school_pass_overall / school_students * 100

In [200]:
# School's key metrics summary
per_school_summary = pd.DataFrame({"School Type": school_types, "Total Students": school_students, "Total School Budget": school_budget,
                                 "Per Student Budget": school_budget_per_student, "Average Math Score": school_math_score,
                                 "Average Reading Score": school_read_score, "% Passing Math": school_pass_math_perc,
                                 "% Passing Reading": school_pass_read_perc, "% Overall Passing": school_pass_overall_perc})

# Formatting summary
per_school_summary["Total School Budget"] = per_school_summary["Total School Budget"].map("${:,.2f}".format)
per_school_summary["Per Student Budget"] = per_school_summary["Per Student Budget"].map("${:,.2f}".format)

# Display summary
per_school_summary

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,"$3,124,928.00",$628.00,77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,Charter,427,"$248,087.00",$581.00,83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,94.594595,95.945946,90.540541
