In [1]:
#Import Dependencies
import pandas as pd
import os
import numpy as np

In [2]:
#Create filepaths for csv files with school data
school_data_file = "Resources/schools_complete.csv"
#school_data_file = os.path.join("Resources","school_complete.csv")
student_data_file = "Resources/students_complete.csv"
#student_data_file = os.path.join("Resources","students_complete.csv")

In [3]:
#Read school and student data into DataFrames
school_df = pd.read_csv(school_data_file)
student_df = pd.read_csv(student_data_file)


In [4]:
#Merge DataFrames to complete analysis
students_schools_complete = pd.merge(student_df,school_df,how="left",on="school_name")
students_schools_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

In [5]:
#Calculate # of schools
total_schools =len(students_schools_complete["school_name"].unique())
print(total_schools)

15


In [6]:
#Calculate # of students
total_students= students_schools_complete["Student ID"].count()
print(total_students)

39170


In [7]:
#Calculate district budget
total_budget = school_df['budget'].sum()
print(total_budget)


24649428


In [8]:
#Calculate average math score of all students
avg_math_score = students_schools_complete['math_score'].mean()
print(avg_math_score)


78.98537145774827


In [9]:
#Calculate average reading score of all students
avg_reading_score = students_schools_complete['reading_score'].mean()
print(avg_reading_score)

81.87784018381414


In [10]:
#Calculate % of students with a passing math score
passing_math_count = students_schools_complete[(students_schools_complete["math_score"] >= 70)].count()["student_name"]
percent_passing_math = passing_math_count / float(total_students) * 100
print(percent_passing_math)


74.9808526933878


In [11]:
#Calculate % of students with a passing reading score
passing_reading_count = students_schools_complete[(students_schools_complete["reading_score"] >= 70)].count()["student_name"]
percent_passing_reading = passing_reading_count / float(total_students) * 100
print(percent_passing_reading)


85.80546336482001


In [12]:
#Calculate % of students with passing scores for both math and reading
passing_math_reading_count = students_schools_complete[
    (students_schools_complete["math_score"] >= 70) & (students_schools_complete["reading_score"] >= 70)
].count()["student_name"]
percent_passing_overall = passing_math_reading_count / float(total_students) *100
print(percent_passing_overall) 

65.17232575950983


In [13]:
#Create a DataFrame to display the district's key metrics
district_summary = pd.DataFrame({"Total Schools":[total_schools],"Total Students":[total_students]
, "Total Budget":[total_budget], "Average Math Score":[avg_math_score],"Average Reading Score":[avg_reading_score]
, "% Passing Math":[percent_passing_math],"% Passing Reading":[percent_passing_reading],"% Overall Passing":[percent_passing_overall]})

# Formatting
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)
district_summary["Average Math Score"] = district_summary["Average Math Score"].map("{:.2f}".format)
district_summary["Average Reading Score"] = district_summary["Average Reading Score"].map("{:.2f}".format) 
district_summary["% Passing Math"] = district_summary["% Passing Math"].map("{:.2f}%".format)
district_summary["% Passing Reading"] = district_summary["% Passing Reading"].map("{:.2f}%".format) 
district_summary["% Overall Passing"] = district_summary["% Overall Passing"].map("{:.2f}%".format)   

district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.99,81.88,74.98%,85.81%,65.17%


## School Summary

In [14]:
school_types = school_df.set_index(["school_name"])["type"]

In [15]:
per_school_students = students_schools_complete.groupby(["school_name"]).count()["Student ID"]


In [16]:
per_school_budget = students_schools_complete.groupby(["school_name"]).mean()["budget"]
per_school_capita = per_school_budget / per_school_students

In [17]:
per_school_math_score = students_schools_complete.groupby(["school_name"]).mean()["math_score"]
per_school_reading_score = students_schools_complete.groupby(["school_name"]).mean()["reading_score"]


In [18]:
per_school_passing_math = students_schools_complete[(students_schools_complete["math_score"] >= 70)]
per_school_passing_math = per_school_passing_math.groupby(["school_name"]).count()["Student ID"] / per_school_students * 100

per_school_passing_reading = students_schools_complete[(students_schools_complete["reading_score"] >= 70)]
per_school_passing_reading = per_school_passing_reading.groupby(["school_name"]).count()["Student ID"] / per_school_students * 100

per_school_passing_overall = students_schools_complete[(students_schools_complete["math_score"] >= 70) & 
                                (students_schools_complete["reading_score"] >= 70)]
per_school_passing_overall = per_school_passing_overall.groupby(["school_name"]).count()["Student ID"] / per_school_students * 100



In [19]:
school_summary = pd.DataFrame(columns= ["School Type", "Total Students", "Total School Budget"
        , "Per Student Budget", "Average Math Score", "Average Reading Score"
        , "% Passing Math", "% Passing Reading", "% Overall Passing"])

school_summary["School Type"] = school_types
school_summary["Total Students"] = pd.to_numeric(per_school_students)
school_summary["Total School Budget"] = per_school_budget
school_summary["Per Student Budget"] = per_school_capita
school_summary["Average Math Score"] = per_school_math_score
school_summary["Average Reading Score"] = per_school_reading_score
school_summary["% Passing Math"] = per_school_passing_math
school_summary["% Passing Reading"] = per_school_passing_reading
school_summary["% Overall Passing"] = per_school_passing_overall

school_summary.sort_index(inplace=True)


In [20]:
#Create a copy of the school summary to format and display
school_summary_format = school_summary.copy()
# Formatting
school_summary_format["Total Students"] = school_summary_format["Total Students"].map("{:,}".format)
school_summary_format["Total School Budget"] = school_summary_format["Total School Budget"].map("${:,.2f}".format)
school_summary_format["Average Math Score"] = school_summary_format["Average Math Score"].map("{:.2f}".format)
school_summary_format["Average Reading Score"] = school_summary_format["Average Reading Score"].map("{:.2f}".format) 
school_summary_format["% Passing Math"] = school_summary_format["% Passing Math"].map("{:.2f}%".format)
school_summary_format["% Passing Reading"] = school_summary_format["% Passing Reading"].map("{:.2f}%".format) 
school_summary_format["% Overall Passing"] = school_summary_format["% Overall Passing"].map("{:.2f}%".format)
#Display the School Summary
school_summary_format


Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,"$3,124,928.00",628.0,77.05,81.03,66.68%,81.93%,54.64%
Cabrera High School,Charter,1858,"$1,081,356.00",582.0,83.06,83.98,94.13%,97.04%,91.33%
Figueroa High School,District,2949,"$1,884,411.00",639.0,76.71,81.16,65.99%,80.74%,53.20%
Ford High School,District,2739,"$1,763,916.00",644.0,77.1,80.75,68.31%,79.30%,54.29%
Griffin High School,Charter,1468,"$917,500.00",625.0,83.35,83.82,93.39%,97.14%,90.60%
Hernandez High School,District,4635,"$3,022,020.00",652.0,77.29,80.93,66.75%,80.86%,53.53%
Holden High School,Charter,427,"$248,087.00",581.0,83.8,83.81,92.51%,96.25%,89.23%
Huang High School,District,2917,"$1,910,635.00",655.0,76.63,81.18,65.68%,81.32%,53.51%
Johnson High School,District,4761,"$3,094,650.00",650.0,77.07,80.97,66.06%,81.22%,53.54%
Pena High School,Charter,962,"$585,858.00",609.0,83.84,84.04,94.59%,95.95%,90.54%


## Highest-Performing Schools (by % Overall Passing)

In [21]:
top_num = 5


top_schools_overall_passing = school_summary.sort_values(by="% Overall Passing", ascending=False)
top_schools_overall_passing.head(top_num)


Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,1081356.0,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Thomas High School,Charter,1635,1043130.0,638.0,83.418349,83.84893,93.272171,97.308869,90.948012
Griffin High School,Charter,1468,917500.0,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Wilson High School,Charter,2283,1319574.0,578.0,83.274201,83.989488,93.867718,96.539641,90.582567
Pena High School,Charter,962,585858.0,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


## Lowest-Performing Schools (by % Overall Passing)

In [22]:
low_num = 5

low_schools_overall_passing = school_summary.sort_values(by="% Overall Passing", ascending=True)
low_schools_overall_passing.head(low_num)


Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,District,3999,2547363.0,637.0,76.842711,80.744686,66.366592,80.220055,52.988247
Figueroa High School,District,2949,1884411.0,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Huang High School,District,2917,1910635.0,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Hernandez High School,District,4635,3022020.0,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Johnson High School,District,4761,3094650.0,650.0,77.072464,80.966394,66.057551,81.222432,53.539172


## Math Scores by Grade

In [23]:
math_scores_ninth_grade = students_schools_complete[(students_schools_complete["grade"] == "9th")].groupby(["school_name"]).mean()["math_score"]
math_scores_tenth_grade = students_schools_complete[(students_schools_complete["grade"] == "10th")].groupby(["school_name"]).mean()["math_score"]
math_scores_eleventh_grade = students_schools_complete[(students_schools_complete["grade"] == "11th")].groupby(["school_name"]).mean()["math_score"]
math_scores_twelfth_grade = students_schools_complete[(students_schools_complete["grade"] == "12th")].groupby(["school_name"]).mean()["math_score"]

math_scores_by_grade = pd.DataFrame({"9th":math_scores_ninth_grade,"10th":math_scores_tenth_grade,"11th":math_scores_eleventh_grade,"12th":math_scores_twelfth_grade})
math_scores_by_grade.index.name = None

#Formatting
math_scores_by_grade["9th"] = math_scores_by_grade["9th"].map("{:.2f}%".format)
math_scores_by_grade["10th"] = math_scores_by_grade["10th"].map("{:.2f}%".format)
math_scores_by_grade["11th"] = math_scores_by_grade["11th"].map("{:.2f}%".format)
math_scores_by_grade["12th"] = math_scores_by_grade["12th"].map("{:.2f}%".format)

math_scores_by_grade

Unnamed: 0,9th,10th,11th,12th
Bailey High School,77.08%,77.00%,77.52%,76.49%
Cabrera High School,83.09%,83.15%,82.77%,83.28%
Figueroa High School,76.40%,76.54%,76.88%,77.15%
Ford High School,77.36%,77.67%,76.92%,76.18%
Griffin High School,82.04%,84.23%,83.84%,83.36%
Hernandez High School,77.44%,77.34%,77.14%,77.19%
Holden High School,83.79%,83.43%,85.00%,82.86%
Huang High School,77.03%,75.91%,76.45%,77.23%
Johnson High School,77.19%,76.69%,77.49%,76.86%
Pena High School,83.63%,83.37%,84.33%,84.12%


## Reading Scores by Grade

In [24]:
reading_scores_ninth_grade = students_schools_complete[(students_schools_complete["grade"] == "9th")].groupby(["school_name"]).mean()["reading_score"]
reading_scores_tenth_grade = students_schools_complete[(students_schools_complete["grade"] == "10th")].groupby(["school_name"]).mean()["reading_score"]
reading_scores_eleventh_grade = students_schools_complete[(students_schools_complete["grade"] == "11th")].groupby(["school_name"]).mean()["reading_score"]
reading_scores_twelfth_grade = students_schools_complete[(students_schools_complete["grade"] == "12th")].groupby(["school_name"]).mean()["reading_score"]

reading_scores_by_grade = pd.DataFrame({"9th":reading_scores_ninth_grade,"10th":reading_scores_tenth_grade,"11th":reading_scores_eleventh_grade,"12th":reading_scores_twelfth_grade})
reading_scores_by_grade.index.name = None

#Formatting
reading_scores_by_grade["9th"] = reading_scores_by_grade["9th"].map("{:.2f}%".format)
reading_scores_by_grade["10th"] = reading_scores_by_grade["10th"].map("{:.2f}%".format)
reading_scores_by_grade["11th"] = reading_scores_by_grade["11th"].map("{:.2f}%".format)
reading_scores_by_grade["12th"] = reading_scores_by_grade["12th"].map("{:.2f}%".format)

reading_scores_by_grade

Unnamed: 0,9th,10th,11th,12th
Bailey High School,81.30%,80.91%,80.95%,80.91%
Cabrera High School,83.68%,84.25%,83.79%,84.29%
Figueroa High School,81.20%,81.41%,80.64%,81.38%
Ford High School,80.63%,81.26%,80.40%,80.66%
Griffin High School,83.37%,83.71%,84.29%,84.01%
Hernandez High School,80.87%,80.66%,81.40%,80.86%
Holden High School,83.68%,83.32%,83.82%,84.70%
Huang High School,81.29%,81.51%,81.42%,80.31%
Johnson High School,81.26%,80.77%,80.62%,81.23%
Pena High School,83.81%,83.61%,84.34%,84.59%


## Scores by School Spending

In [25]:
spending_bins = [0,600,630,650,670]
labels = ["<$600","$600-630","$630-650","$650-670"]

In [26]:
school_spending_df = school_summary.copy()

In [27]:
school_spending_df["Spending Ranges (Per Student)"] = pd.cut(school_spending_df["Per Student Budget"],bins=spending_bins, labels=labels, right=False)


In [28]:
spending_math_scores = school_spending_df.groupby(["Spending Ranges (Per Student)"]).mean()["Average Math Score"]
spending_reading_scores = school_spending_df.groupby(["Spending Ranges (Per Student)"]).mean()["Average Reading Score"]
spending_passing_math = school_spending_df.groupby(["Spending Ranges (Per Student)"]).mean()["% Passing Math"]
spending_passing_reading = school_spending_df.groupby(["Spending Ranges (Per Student)"]).mean()["% Passing Reading"]
spending_overall_passing = school_spending_df.groupby(["Spending Ranges (Per Student)"]).mean()["% Overall Passing"]

In [29]:
spending_summary = pd.DataFrame({"Average Math Score":spending_math_scores,"Average Reading Score":spending_reading_scores
    ,"% Passing Math":spending_passing_math,"% Passing Reading":spending_passing_reading,"% Overall Passing":spending_overall_passing})

#Formatting

spending_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Spending Ranges (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$600,83.455399,83.933814,93.460096,96.610877,90.369459
$600-630,81.899826,83.155286,87.133538,92.718205,81.418596
$630-650,78.518855,81.624473,73.484209,84.391793,62.857656
$650-670,76.99721,81.027843,66.164813,81.133951,53.526855


## Scores by School Size

In [30]:
size_bins = [0,1000,2000,5000]
size_labels = ["Small (<1000)","Medium (1000-2000)","Large(2000-5000)"]

In [31]:
school_size_df = school_summary.copy()


school_size_df["School Size"]=pd.cut(school_size_df["Total Students"], bins=size_bins, labels=size_labels)
school_size_df

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,School Size
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Bailey High School,District,4976,3124928.0,628.0,77.048432,81.033963,66.680064,81.93328,54.642283,Large(2000-5000)
Cabrera High School,Charter,1858,1081356.0,582.0,83.061895,83.97578,94.133477,97.039828,91.334769,Medium (1000-2000)
Figueroa High School,District,2949,1884411.0,639.0,76.711767,81.15802,65.988471,80.739234,53.204476,Large(2000-5000)
Ford High School,District,2739,1763916.0,644.0,77.102592,80.746258,68.309602,79.299014,54.289887,Large(2000-5000)
Griffin High School,Charter,1468,917500.0,625.0,83.351499,83.816757,93.392371,97.138965,90.599455,Medium (1000-2000)
Hernandez High School,District,4635,3022020.0,652.0,77.289752,80.934412,66.752967,80.862999,53.527508,Large(2000-5000)
Holden High School,Charter,427,248087.0,581.0,83.803279,83.814988,92.505855,96.252927,89.227166,Small (<1000)
Huang High School,District,2917,1910635.0,655.0,76.629414,81.182722,65.683922,81.316421,53.513884,Large(2000-5000)
Johnson High School,District,4761,3094650.0,650.0,77.072464,80.966394,66.057551,81.222432,53.539172,Large(2000-5000)
Pena High School,Charter,962,585858.0,609.0,83.839917,84.044699,94.594595,95.945946,90.540541,Small (<1000)


In [32]:
size_math_scores = school_size_df.groupby(["School Size"]).mean()["Average Math Score"]
size_reading_scores = school_size_df.groupby(["School Size"]).mean()["Average Reading Score"]
size_passing_math = school_size_df.groupby(["School Size"]).mean()["% Passing Math"]
size_passing_reading = school_size_df.groupby(["School Size"]).mean()["% Passing Reading"]
size_overall_passing = school_size_df.groupby(["School Size"]).mean()["% Overall Passing"]


In [33]:
size_summary = pd.DataFrame({"Average Math Score":size_math_scores,"Average Reading Score":size_reading_scores
    ,"% Passing Math":size_passing_math,"% Passing Reading":size_passing_reading, "% Overall Passing":size_overall_passing})

#Formatting

size_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.821598,83.929843,93.550225,96.099437,89.883853
Medium (1000-2000),83.374684,83.864438,93.599695,96.79068,90.621535
Large(2000-5000),77.746417,81.344493,69.963361,82.766634,58.286003


## Scores by School Type

In [34]:
school_type_math_scores = school_summary.groupby(["School Type"]).mean()["Average Math Score"]
school_type_reading_scores = school_summary.groupby(["School Type"]).mean()["Average Reading Score"]
school_type_passing_math = school_summary.groupby(["School Type"]).mean()["% Passing Math"]
school_type_passing_reading = school_summary.groupby(["School Type"]).mean()["% Passing Reading"]
school_type_overall_passing = school_summary.groupby(["School Type"]).mean()["% Overall Passing"]



In [35]:
type_summary = pd.DataFrame({"Average Math Score":school_type_math_scores,"Average Reading Score":school_type_reading_scores
    ,"% Passing Math":school_type_passing_math,"% Passing Reading":school_type_passing_reading, "% Overall Passing":school_type_overall_passing})

#Formatting

type_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.473852,83.896421,93.62083,96.586489,90.432244
District,76.956733,80.966636,66.548453,80.799062,53.672208
