In [167]:
# Dependencies and Setup
import pandas as pd

# File to Load
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Rename columns in both data frames for consistency and clarity
school_data = school_data.rename(columns = {'school_name':'School Name', 'type':'School Type', 'size':'School Size', 'budget':'School Budget'})
student_data = student_data.rename(columns = {'student_name':'Student Name', 'gender':'Gender', 'grade':'Grade', 'school_name':'School Name', 'math_score':'Math Score', 'reading_score':'Reading Score'})

# Combine the data into a single dataset
school_data_complete = pd.merge(student_data, school_data, how='left', on=['School Name', 'School Name'])

In [168]:
school_data_complete.head()

Unnamed: 0,Student ID,Student Name,Gender,Grade,School Name,Reading Score,Math Score,School ID,School Type,School Size,School Budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [175]:
## SCHOOL DISTRICT SUMMARY
## Generate high-level snapshot (in table form) of the district's key metrics

# Calculate the total number of schools
total_schools = school_data['School ID'].count()

# Calculate the total number of students
total_students = student_data['Student ID'].count()

# Calculate the total budget
total_budget = school_data['School Budget'].sum()

# Calculate the average math score
average_math_score = student_data['Math Score'].mean()
 
# Calculate the average reading score
average_reading_score = student_data['Reading Score'].mean()

# Calculate the overall passing rate (overall average score), i.e. (avg. math score + avg. reading score)/2
overall_passing_rate = (average_math_score + average_reading_score) / 2

# Calculate the percentage of students with a passing math score (70 or greater)
passing_math_score = student_data[student_data['Math Score'] >= 70]
percent_passing_math = (passing_math_score['Math Score'].count() / total_students) * 100

# Calculate the percentage of students with a passing reading score (70 or greater)
passing_reading_score = student_data[student_data['Reading Score'] >= 70]
percent_passing_reading = (passing_reading_score['Reading Score'].count() / total_students) * 100

# Create a summary dataframe to hold the above results
school_district_summary = pd.DataFrame([(total_schools, total_students, total_budget, average_math_score, average_reading_score, percent_passing_math, percent_passing_reading, overall_passing_rate)], columns=['Total Schools','Total Students','Total Budget','Average Math Score','Average Reading Score', '% Passing Math','% Passing Reading', 'Overall Passing Rate'])

# Format the total budget column
school_district_summary['Total Budget'] = school_district_summary['Total Budget'].map("${:,.2f}".format)

# Display School District Summary data frame 
print(f"SCHOOL DISTRICT SUMMARY")
print(f"_" * 147)
school_district_summary

SCHOOL DISTRICT SUMMARY
___________________________________________________________________________________________________________________________________________________


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,80.431606


In [176]:
## SCHOOL  SUMMARY
## Create an overview table that summarizes key metrics about each school

# Group the merged data frame by Schools to run analysis on each School
schools_grouped = school_data_complete.groupby(['School Name'])

# Get the type of school
school_type = schools_grouped['School Type'].max()

# Calculate the total number of students in the school
total_school_students = schools_grouped['Student ID'].count()

# Calculate the total school budget
total_school_budget = schools_grouped['School Budget'].max()

# Calculate the per student budget for each school
per_student_school_budget = total_school_budget / total_school_students

# Calculate the average math score for each school
average_school_math_score = schools_grouped['Math Score'].mean()

# Calculate the average reading score for each school
average_school_reading_score = schools_grouped['Reading Score'].mean() 

# Group the merged data frame by Schools once again, but based on passing math scores 70 or above
school_passing_math_score = school_data_complete[school_data_complete['Math Score'] >= 70].groupby(['School Name'])
# Calculate the % passing math score for each school
percent_school_passing_math_score = (school_passing_math_score['Student ID'].count() / total_school_students) * 100

# Group the merged data frame by Schools once again, but based on passing reading scores 70 or above
school_passing_reading_score = school_data_complete[school_data_complete['Reading Score'] >= 70].groupby(['School Name'])
# Calculate the % reading score for each school
percent_school_passing_reading_score = (school_passing_reading_score['Student ID'].count() / total_school_students) * 100

# Calculate the overall passing rate for each school (average of the above two)
overall_school_passing_rate = (percent_school_passing_math_score + percent_school_passing_reading_score) / 2

# Save all of the school metrics to a Data Frame
school_metrics_summary = pd.DataFrame({ 'School Type': school_type, 'Total Students': total_school_students, 'Total School Budget': total_school_budget, 'Per Student Budget': per_student_school_budget, 'Average Math Score': average_school_math_score, 'Average Reading Score': average_school_reading_score, '% Passing Math': percent_school_passing_math_score, '% Passing Reading': percent_school_passing_reading_score, '% Overall Passing Rate': overall_school_passing_rate} )

bottom_passing_rate_schools = school_metrics_summary.sort_values('% Overall Passing Rate')

# Format the budget columns
school_metrics_summary['Total School Budget'] = school_metrics_summary['Total School Budget'].map("${:,.2f}".format)
school_metrics_summary['Per Student Budget'] = school_metrics_summary['Per Student Budget'].map("${:,.2f}".format)

# Display metrics for all schools
school_metrics_summary

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,"$3,124,928.00",$628.00,77.048432,81.033963,66.680064,81.93328,74.306672
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133477,97.039828,95.586652
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988471,80.739234,73.363852
Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.309602,79.299014,73.804308
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392371,97.138965,95.265668
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.289752,80.934412,66.752967,80.862999,73.807983
Holden High School,Charter,427,"$248,087.00",$581.00,83.803279,83.814988,92.505855,96.252927,94.379391
Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.683922,81.316421,73.500171
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.057551,81.222432,73.639992
Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,94.594595,95.945946,95.27027


In [177]:
## TOP PERFORMING SCHOOLS BY PASSING RATE

# Sort and display the top five schools in overall passing rate
top_passing_rate_schools = school_metrics_summary.sort_values('% Overall Passing Rate', ascending=False)
top_passing_rate_schools.head()

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133477,97.039828,95.586652
Thomas High School,Charter,1635,"$1,043,130.00",$638.00,83.418349,83.84893,93.272171,97.308869,95.29052
Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,94.594595,95.945946,95.27027
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392371,97.138965,95.265668
Wilson High School,Charter,2283,"$1,319,574.00",$578.00,83.274201,83.989488,93.867718,96.539641,95.203679


In [179]:
## BOTTOM PERFORMING SCHOOLS BY PASSING RATE

# Sort and display the five worst-performing schools in overall passing rate
bottom_passing_rate_schools = school_metrics_summary.sort_values('% Overall Passing Rate')
bottom_passing_rate_schools.head()

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,District,3999,"$2,547,363.00",$637.00,76.842711,80.744686,66.366592,80.220055,73.293323
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988471,80.739234,73.363852
Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.683922,81.316421,73.500171
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.057551,81.222432,73.639992
Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.309602,79.299014,73.804308
