# PyCity Schools

In [1]:
import pandas as pd

# Read CSVs
school_data = pd.read_csv("../resources/schools_complete.csv")
student_data = pd.read_csv("../resources/students_complete.csv")

# Merge datasets
all_school_data = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

# Rename columns
all_school_data = all_school_data.rename(columns={"student_name":"Student Name", 
                                "gender": "Gender", 
                                "grade": "grade", 
                                "school_name": "School Name", 
                                "reading_score": "Reading Score", 
                                "math_score": "Math Score", 
                                "type": "Type", 
                                "size": "Size", "budget": "Budget"})

all_school_data.head()

Unnamed: 0,Student ID,Student Name,Gender,grade,School Name,Reading Score,Math Score,School ID,Type,Size,Budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

In [2]:
# Calculate stats for all schools
total_schools = len(all_school_data["School Name"].unique())
total_students = len(all_school_data["Student ID"].unique())
total_budget = school_data["budget"].sum()
avg_math = all_school_data["Math Score"].mean()
avg_reading = all_school_data["Reading Score"].mean()
avg_pass = (all_school_data["Math Score"].mean() + all_school_data["Reading Score"].mean()) / 2
pass_math = all_school_data["Math Score"][all_school_data["Math Score"] >= 70].count() / total_students * 100 
pass_reading = all_school_data["Reading Score"][all_school_data["Reading Score"] >= 70].count() / total_students * 100

# Create table
summary_table1 = pd.DataFrame({
    'Total Schools': [total_schools],
    'Total Students': [total_students],
    'Total Budget': [total_budget],
    'Average Math Score': [round(avg_math, 4)],
    'Average Reading Score': [round(avg_reading, 4)],
    '% Passing Math': [round(pass_math, 4)],
    '% Passing Reading': [round(pass_reading, 4)],
    '% Overall Passing Rate': [round(avg_pass, 4)]
})

# Format
summary_table1['Total Students'] = summary_table1['Total Students'].map('{:,.0f}'.format)
summary_table1['Total Budget'] = summary_table1['Total Budget'].map('${:,.2f}'.format)

summary_table1

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,"$24,649,428.00",78.9854,81.8778,74.9809,85.8055,80.4316


## School Summary

In [3]:
# Group data by school and calculate stats
grouped_school_data = all_school_data.groupby(['School Name'])

school_type = grouped_school_data['Type'].agg(pd.Series.mode)
students_per_school = grouped_school_data['Student ID'].count()
school_budget = grouped_school_data['Budget'].sum() / grouped_school_data['School Name'].count()
per_student_budget = school_budget / students_per_school
school_avg_math = grouped_school_data["Math Score"].mean()
school_avg_reading = grouped_school_data["Reading Score"].mean()

school_pass_math = all_school_data[all_school_data['Math Score'] >= 70].groupby('School Name')['Student ID'].count()\
    / students_per_school * 100
school_pass_reading = all_school_data[all_school_data['Reading Score'] >= 70].groupby('School Name')['Student ID'].count()\
    / students_per_school * 100
school_avg_pass = (school_pass_math + school_pass_reading) / 2


# Table 
summary_table2 = pd.DataFrame({
    'School Type': school_type,
    'Total Students': students_per_school,
    'Total School Budget': school_budget,
    'Per Student Budget': per_student_budget,
    'Average Math Score': round(school_avg_math, 4),
    'Average Reading Score': round(school_avg_reading, 4),
    '% Passing Math': round(school_pass_math, 4),
    '% Passing Reading': round(school_pass_reading, 4),
    '% Overall Passing Rate': round(school_avg_pass, 4)
})

# Format
summary_table2['Total Students'] = summary_table2['Total Students'].map('{:,.0f}'.format)
summary_table2['Total School Budget'] = summary_table2['Total School Budget'].map('${:,.2f}'.format)

summary_table2

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,"$3,124,928.00",628.0,77.0484,81.034,66.6801,81.9333,74.3067
Cabrera High School,Charter,1858,"$1,081,356.00",582.0,83.0619,83.9758,94.1335,97.0398,95.5867
Figueroa High School,District,2949,"$1,884,411.00",639.0,76.7118,81.158,65.9885,80.7392,73.3639
Ford High School,District,2739,"$1,763,916.00",644.0,77.1026,80.7463,68.3096,79.299,73.8043
Griffin High School,Charter,1468,"$917,500.00",625.0,83.3515,83.8168,93.3924,97.139,95.2657
Hernandez High School,District,4635,"$3,022,020.00",652.0,77.2898,80.9344,66.753,80.863,73.808
Holden High School,Charter,427,"$248,087.00",581.0,83.8033,83.815,92.5059,96.2529,94.3794
Huang High School,District,2917,"$1,910,635.00",655.0,76.6294,81.1827,65.6839,81.3164,73.5002
Johnson High School,District,4761,"$3,094,650.00",650.0,77.0725,80.9664,66.0576,81.2224,73.64
Pena High School,Charter,962,"$585,858.00",609.0,83.8399,84.0447,94.5946,95.9459,95.2703


## Top Performing Schools (By Passing Rate)

In [4]:
# Sort school performance table for best schools
summary_table3 = summary_table2.sort_values('% Overall Passing Rate', ascending=False).head(5)

summary_table3

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,"$1,081,356.00",582.0,83.0619,83.9758,94.1335,97.0398,95.5867
Thomas High School,Charter,1635,"$1,043,130.00",638.0,83.4183,83.8489,93.2722,97.3089,95.2905
Pena High School,Charter,962,"$585,858.00",609.0,83.8399,84.0447,94.5946,95.9459,95.2703
Griffin High School,Charter,1468,"$917,500.00",625.0,83.3515,83.8168,93.3924,97.139,95.2657
Wilson High School,Charter,2283,"$1,319,574.00",578.0,83.2742,83.9895,93.8677,96.5396,95.2037


## Bottom Performing Schools (By Passing Rate)

In [5]:
# Sort school performance table for worst schools
summary_table4 = summary_table2.sort_values('% Overall Passing Rate', ascending=True).head(5)

summary_table4

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,District,3999,"$2,547,363.00",637.0,76.8427,80.7447,66.3666,80.2201,73.2933
Figueroa High School,District,2949,"$1,884,411.00",639.0,76.7118,81.158,65.9885,80.7392,73.3639
Huang High School,District,2917,"$1,910,635.00",655.0,76.6294,81.1827,65.6839,81.3164,73.5002
Johnson High School,District,4761,"$3,094,650.00",650.0,77.0725,80.9664,66.0576,81.2224,73.64
Ford High School,District,2739,"$1,763,916.00",644.0,77.1026,80.7463,68.3096,79.299,73.8043
