In [2]:
# Dependencies and Setup
import pandas as pd

# File to Load
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [3]:
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [30]:
## SCHOOL DISTRICT SUMMARY
## -----------------------

# Calculate the total number of schools
total_schools = school_data['School ID'].count()

# Calculate the total number of students
total_students = student_data['Student ID'].count()

# Calculate the total budget
total_budget = school_data['budget'].sum()

# Calculate the average math score
average_math_score = student_data['math_score'].mean()
 
# Calculate the average reading score
average_reading_score = student_data['reading_score'].mean()

# Calculate the overall passing rate (overall average score), i.e. (avg. math score + avg. reading score)/2
overall_passing_rate = (average_math_score + average_reading_score) / 2

# Calculate the percentage of students with a passing math score (70 or greater)
passing_math_score = student_data[student_data['math_score'] >= 70]
percent_passing_math = passing_math_score['math_score'].count() / total_students

# Calculate the percentage of students with a passing reading score (70 or greater)
passing_reading_score = student_data[student_data['reading_score'] >= 70]
percent_passing_reading = passing_reading_score['reading_score'].count() / total_students

# Create a dataframe to hold the above results
school_district_summary = pd.DataFrame([(total_schools, total_students, total_budget, average_math_score, average_reading_score, percent_passing_math, percent_passing_reading, overall_passing_rate)], columns=['Total Schools','Total Students','Total Budget','Average Math Score','Average Reading Score', '% Passing Math','% Passing Reading', 'Overall Passing Rate'])

# Round the percentage column to two decimal points in the summary data frame
school_district_summary['% Passing Math'] = school_district_summary['% Passing Math'].map("{:.2%}".format)
school_district_summary['% Passing Reading'] = school_district_summary['% Passing Reading'].map("{:.2%}".format)

# Format the remaining stats to display the numbers cleanly
school_district_summary['Total Students'] = school_district_summary['Total Students'].map("{:,}".format)
school_district_summary['Total Budget'] = school_district_summary['Total Budget'].map("${:,.2f}".format)
school_district_summary['Average Math Score'] = school_district_summary['Average Math Score'].map("{:.2f}".format)
school_district_summary['Average Reading Score'] = school_district_summary['Average Reading Score'].map("{:.2f}".format)
school_district_summary['Overall Passing Rate'] = school_district_summary['Overall Passing Rate'].map("{:.2f}".format)

# Display Purchase Analysis Summary data frame 
print(f"SCHOOL DISTRICT SUMMARY")
print(f"_" * 147)
school_district_summary

SCHOOL DISTRICT SUMMARY
___________________________________________________________________________________________________________________________________________________


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
0,15,39170,"$24,649,428.00",78.99,81.88,74.98%,85.81%,80.43
