In [1]:
# import dependencies
import pandas as pd
from pathlib import Path

# create path for csv files
school_csv_path = Path("Resources/schools_complete.csv")
student_csv_path = Path("Resources/students_complete.csv")

# create dataframes from the csv files
school_df = pd.read_csv(school_csv_path)
student_df = pd.read_csv(student_csv_path)

# combine both dataframes into a singular dataframe using merge  
school_df_complete = pd.merge(student_df, school_df, how="left", on=["school_name", "school_name"])
school_df_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [5]:
# find the total number of unique schools
school_total = school_df_complete.school_name.nunique()
school_total

15

In [6]:
# find the total number of students
student_total = school_df_complete.student_name.count()
student_total

39170

In [7]:
# find the total budget
budget_total = school_df.budget.sum()
budget_total

24649428

In [8]:
avg_math_score = school_df_complete.math_score.mean()
avg_math_score

78.98537145774827

In [9]:
avg_reading_score = school_df_complete.reading_score.mean()
avg_reading_score

81.87784018381414

In [10]:
# calculate the percentage of students who passed math (scored a 70 or above)
passing_math_count = school_df_complete[(school_df_complete["math_score"] >= 70)].count()["student_name"]
passing_math_percentage = passing_math_count / float(student_total) * 100
passing_math_percentage

74.9808526933878

In [11]:
# use the same method to calculate the reading percentage
passing_reading_count = school_df_complete[(school_df_complete["reading_score"] >= 70)].count()["student_name"]
passing_reading_percentage = passing_reading_count / float(student_total) * 100
passing_reading_percentage

85.80546336482001

In [12]:
# using a similar method as before, calculate the percentage of students who passed both
passing_math_reading_count = school_df_complete[
    (school_df_complete["math_score"] >= 70) & (school_df_complete["reading_score"] >= 70)
].count()["student_name"]
overall_passing_rate = passing_math_reading_count /  float(student_total) * 100
overall_passing_rate

65.17232575950983

In [15]:
# create a district summary dataframe
# first, create a list of data to turn into the data frame
data = {"Total Schools": [school_total],
        "Total Students": [student_total],
         "Total Budget": [budget_total],
         "Average Math Score": [avg_math_score],
         "Average Reading Score": [avg_reading_score],
         "% Passing Math": [passing_math_percentage],
         "% Passing Reading": [passing_reading_percentage],
         "Overall Passing Rate": [overall_passing_rate]}
district_summary = pd.DataFrame(data)

# format the dollar amounts and student count for readability
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)

district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326
