In [93]:
# Dependencies
import pandas as pd

# Load files and read .csv
school_data_file = "../Resources/schools_complete.csv"
school_data = pd.read_csv(school_data_file)
student_data_file = "../Resources/students_complete.csv"
student_data = pd.read_csv(student_data_file)

# Merge the two .csv files into one DataFrame
school_data_merge = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_merge.head()


Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


#District Summary

In [94]:
# Total number of unique schools
school_unique = school_data["school_name"].count()
school_unique

15

In [95]:
# Total number of students
student_count = student_data["student_name"].count()
student_count

39170

In [96]:
# Total budget
total_budget = school_data["budget"].sum()
total_budget

24649428

In [97]:
# Average math score
avg_math_score = student_data["math_score"].mean()
avg_math_score

78.98537145774827

In [98]:
# Average reading score
avg_read_score = student_data["reading_score"].mean()
avg_read_score

81.87784018381414

In [99]:
# Percentage of students who passed math (math scores greather than or equal to 70)
passing_math_count = school_data_merge[(school_data_merge["math_score"] >= 70)].count()["student_name"]
passing_math_percentage = passing_math_count / float(student_count) * 100
passing_math_percentage

74.9808526933878

In [100]:
# Percentage of students who passed reading (reading scores greather than or equal to 70)
passing_read_count = school_data_merge[(school_data_merge["reading_score"] >= 70)].count()["student_name"]
passing_read_percentage = passing_read_count / float(student_count) * 100
passing_read_percentage

85.80546336482001

In [101]:
# Percentage of students that passed math and reading
passing_math_read_count = school_data_merge[
    (school_data_merge["math_score"] >= 70) & (school_data_merge["reading_score"] >= 70)
].count()["student_name"]
overall_passing_rate = passing_math_read_count /  float(student_count) * 100
overall_passing_rate

65.17232575950983

In [102]:
# Create a high-level snapshot of the district's key metrics in a DataFrame
district_summary = pd.DataFrame({
    "Total Schools":[school_unique],
    "Total Students":[student_count],
    "Total Budget":[total_budget],
    "Average Math Score":[avg_math_score],
    "Average Reading Score":[avg_read_score],
    "% Passing Math":[passing_math_percentage],
    "% Passing Reading":[passing_read_percentage],
    "% Overall Passing":[overall_passing_rate]
})

# Formatting
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)

# Display the DataFrame
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326
