In [1]:
#Dependencies and setup
import pandas as pd

#File paths
school_path = "Resources/schools_complete.csv"
student_path = "Resources/students_complete.csv"

#Read csv files
school_data = pd.read_csv(school_path)
student_data = pd.read_csv(student_path)

#Merge data into one data frame
complete_data = pd.merge(student_data,school_data,how="left",on=["school_name","school_name"])
complete_data.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [2]:
# Total number of unique schools in the district
total_schools = int(school_data["School ID"].nunique())
total_schools

15

In [3]:
#Total number of students in the district
total_students = student_data["Student ID"].nunique()
total_students

39170

In [4]:
#Total budget for all schools in the district
total_budget = school_data["budget"].sum()
total_budget

24649428

In [5]:
#Average math score for the district-wide standardized test
avg_math_score = student_data["math_score"].mean()
avg_math_score

78.98537145774827

In [6]:
#Average reading score for the district-wide standardized test
avg_reading_score = student_data["reading_score"].mean()
avg_reading_score

81.87784018381414

In [7]:
#Percentage of students who got a passing math score (>=70)
total_students_passing_math = student_data[student_data["math_score"]>=70].count()["Student ID"]
percent_passing_math = (total_students_passing_math/total_students)*100
percent_passing_math

74.9808526933878

In [8]:
#Percentage of students who got a passing reading score (>=70)
total_students_passing_reading = student_data[student_data["reading_score"]>=70].count()["Student ID"]
percent_passing_reading = (total_students_passing_reading/total_students)*100
percent_passing_reading

85.80546336482001

In [9]:
#Percentage of students who got a passing score in both math AND reading (>=70 in both subjects)
total_students_passing_both = student_data[(student_data["math_score"]>=70)&(student_data["reading_score"]>=70)].count()["Student ID"]
percent_passing_both = (total_students_passing_both/total_students)*100
percent_passing_both

65.17232575950983

In [10]:
# Summary Data Frame
district_summary = pd.DataFrame({"Total Schools":[total_schools],
                                 "Total Students":[total_students],
                                 "Total Budget":[total_budget],
                                 "Average Math Score":[avg_math_score],
                                 "Average Reading Score":[avg_reading_score],
                                 "% Passing Math":[percent_passing_math],
                                 "% Passing Reading":[percent_passing_reading],
                                 "% Overall Passing":[percent_passing_both]})
# Formatting
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)

# Display the Data Frame
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326
