In [37]:
# Import Dependencies
import pandas as pd
from pathlib import Path

In [38]:
# Create paths to csv files and read them into Pandas DataFrame
school_datafile = Path('Resources/schools_complete.csv')
student_datafile = Path('Resources/students_complete.csv')

# Read the data
school_data = pd.read_csv(school_datafile)
student_data = pd.read_csv(student_datafile)

# Combine the two datasets into one dataset
district_data = pd.merge(student_data, school_data, how="right", on=["school_name", "school_name"])
district_data.head()


Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [39]:
# Calculate the total number of unique schools
total_schools = len(district_data["school_name"].unique())
total_schools

15

In [40]:
# Calculate the total number of students
total_students = district_data["Student ID"].nunique()
total_students

39170

In [42]:
# Calculate the total budget
total_budget = school_data["budget"].sum()
total_budget

24649428

In [45]:
# Calculate the average math score
avg_math_score = district_data["math_score"].mean()
avg_math_score

78.98537145774827

In [46]:
# Calculate the average reading score
avg_reading_score = district_data["reading_score"].mean()
avg_reading_score

81.87784018381414

In [49]:
# Calculate percentage of students passing math
students_passing_math = district_data[(district_data["math_score"] >= 70)].count()["student_name"]
perc_students_passing_math = students_passing_math / float(total_students) * 100
perc_students_passing_math

74.9808526933878

In [50]:
# Calculate percentage of students passing reading
students_passing_reading = district_data[district_data["reading_score"] >= 70].count()["student_name"]
perc_students_passing_reading = students_passing_reading / float(total_students) * 100
perc_students_passing_reading

85.80546336482001

In [51]:
# Calculate percentage of students passing BOTH math AND reading
students_passing_both = district_data[
    (district_data["math_score"] >= 70) & (district_data["reading_score"] >= 70)
].count()["student_name"]
overall_passing_rate = students_passing_both / float(total_students) * 100
overall_passing_rate

65.17232575950983

In [57]:
# Create a high-level snapshot of the distict's key metrics in a DataFrame
district_summary_df = pd.DataFrame({
    "Total Schools": [total_schools],
    "Total Students": [total_students],
    "Total Budget": [total_budget],
    "Average Math Score": [avg_math_score],
    "Average Reading Score": [avg_reading_score],
    "% Passing Math": [perc_students_passing_math],
    "% Passing Reading": [perc_students_passing_reading],
    "% Overall Passing Rate": [overall_passing_rate]
})

# Formatting
district_summary_df["Total Students"] = district_summary_df["Total Students"].map("{:,}".format)
district_summary_df["Total Budget"] = district_summary_df["Total Budget"].map("${:,.2f}".format)

# DataFrame
district_summary_df


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


In [59]:
# Use the code provided to select the school type
school_types = school_data.set_index(["school_name"])["type"]


In [63]:
# Calculate the total student count
per_school_counts = school_data["size"].sum()
per_school_counts

39170

In [69]:
# Calculate school budget and per capita spending per school
per_school_budget = school_data["budget"].mean()
per_school_budget

per_school_capita = per_school_budget / per_school_counts
per_school_capita

41.95290273168241

In [72]:
# Calculate the average test scores per school
per_school_math = district_data["math_score"].mean()

per_school_reading = district_data["reading_score"].mean()

In [None]:
# Calculate the number of students per school with math scores 70+
students_passing_math = district_data["math"]