# PyCity Schools Analysis

* As a whole, schools with higher budgets, did not yield better test results. By contrast, schools with higher spending per student actually (\$645-675) underperformed compared to schools with smaller budgets (<\$585 per student).

* As a whole, smaller and medium sized schools dramatically out-performed large sized schools on passing math performances (89-91% passing vs 67%).

* As a whole, charter schools out-performed the public district schools across all metrics. However, more analysis will be required to glean if the effect is due to school practices or the fact that charter schools tend to serve smaller student populations per school. 
---

In [78]:
# Dependencies and Setup
import pandas as pd

# File to Load 
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset (consider using a left join)

df = pd.merge(student_data, school_data, on='school_name', how = 'left')

df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

In [79]:
# Calculate the Totals (Schools and Students)

student_count = df['Student ID'].count()
school_count = df['school_name'].nunique()

# Calculate the Total Budget
total_budget = school_data['budget'].sum()

# Calculate the Average Scores

avg_math = df['math_score'].mean()
avg_reading = df['reading_score'].mean()

# Calculate the Percentage Pass Rates

mathpass = df.loc[df['math_score'] >= 70]
readpass = df.loc[df['reading_score'] >= 70]
bothpass = df.loc[readpass.index & mathpass.index]

mathpct = len(mathpass)/ student_count
readpct = len(readpass)/student_count
bothpct = len(bothpass)/student_count

# Minor Data Cleanup

summary_df = pd.DataFrame({"Total Students" : [student_count], "Total Schools" : [school_count], 
                           "Total Budget" : [total_budget], "Average Math Score" : [avg_math], 
                           "Average Reading Score" : [avg_reading], "Stu. Pass % (Math)" : [mathpct],
                           "Stu. Pass % (Reading)" : [readpct], "Stu. Pass % (Both)" : [bothpct]})

summary_df['Total Students'] = summary_df['Total Students'].map("{:,}".format)
summary_df['Total Budget'] = summary_df['Total Budget'].map("${:,}".format)
summary_df['Average Math Score'] = summary_df['Average Math Score'].map("{:.2f}".format)
summary_df['Average Reading Score'] = summary_df['Average Reading Score'].map("{:.2f}".format)
summary_df['Stu. Pass % (Math)'] = summary_df['Stu. Pass % (Math)'].map("{:.2%}".format)
summary_df['Stu. Pass % (Reading)'] = summary_df['Stu. Pass % (Reading)'].map("{:.2%}".format)
summary_df['Stu. Pass % (Both)'] = summary_df['Stu. Pass % (Both)'].map("{:.2%}".format)

#Display the data frame
summary_df

Unnamed: 0,Total Students,Total Schools,Total Budget,Average Math Score,Average Reading Score,Stu. Pass % (Math),Stu. Pass % (Reading),Stu. Pass % (Both)
0,39170,15,"$24,649,428",78.99,81.88,74.98%,85.81%,65.17%


## School Summary

In [80]:
# Determine the School Type
school_types = school_data.set_index(['school_name'])['type']

# Calculate the total student count
student_count = df["school_name"].value_counts()

# Calculate the total school budget and per capita spending
tot_school_budget = df.groupby('school_name').max()['budget']

student_budget = tot_school_budget / student_count

# Calculate the average test scores
mathavg = df.groupby('school_name').mean()['math_score']
readingavg = df.groupby('school_name').mean()['reading_score']

# Calculate the passing scores by creating a filtered data frame

passmath = df[df["math_score"]>=70]
passread = df[df["reading_score"]>=70]

passmathpct = passmath.groupby('school_name').count()['math_score'] / student_count 
passreadpct = passread.groupby('school_name').count()['reading_score'] / student_count 

passboth = (passmathpct + passreadpct)/2
            
# Convert to data frame/Minor data munging

schoolsummary_df = pd.DataFrame({"Total Students" : student_count, "School Type" : school_types, 
                                 "Total School Budget" : tot_school_budget, "Budget per Student" : student_budget, 
                                 "Average Math Score" : mathavg, "Average Reading Score" : readingavg, 
                                 "Stu. Pass % (Math)" : passmathpct ,"Stu. Pass % (Reading)" : passreadpct, 
                                 "Stu. Pass % (Both)" : passboth})

schoolsummaryform_df = schoolsummary_df.copy()

schoolsummaryform_df['Total Students'] = schoolsummary_df['Total Students'].map("{:,}".format)
schoolsummaryform_df['Total School Budget'] = schoolsummary_df['Total School Budget'].map("${:,}".format)
schoolsummaryform_df['Budget per Student'] = schoolsummary_df['Budget per Student'].map("${:,}".format)
schoolsummaryform_df['Average Math Score'] = schoolsummary_df['Average Math Score'].map("{:.2f}".format)
schoolsummaryform_df['Average Reading Score'] = schoolsummary_df['Average Reading Score'].map("{:.2f}".format)
schoolsummaryform_df['Stu. Pass % (Math)'] = schoolsummary_df['Stu. Pass % (Math)'].map("{:.2%}".format)
schoolsummaryform_df['Stu. Pass % (Reading)'] = schoolsummary_df['Stu. Pass % (Reading)'].map("{:.2%}".format)
schoolsummaryform_df['Stu. Pass % (Both)'] = schoolsummary_df['Stu. Pass % (Both)'].map("{:.2%}".format)

# Display the data frame

schoolsummaryform_df


Unnamed: 0,Total Students,School Type,Total School Budget,Budget per Student,Average Math Score,Average Reading Score,Stu. Pass % (Math),Stu. Pass % (Reading),Stu. Pass % (Both)
Bailey High School,4976,District,"$3,124,928",$628.0,77.05,81.03,66.68%,81.93%,74.31%
Cabrera High School,1858,Charter,"$1,081,356",$582.0,83.06,83.98,94.13%,97.04%,95.59%
Figueroa High School,2949,District,"$1,884,411",$639.0,76.71,81.16,65.99%,80.74%,73.36%
Ford High School,2739,District,"$1,763,916",$644.0,77.1,80.75,68.31%,79.30%,73.80%
Griffin High School,1468,Charter,"$917,500",$625.0,83.35,83.82,93.39%,97.14%,95.27%
Hernandez High School,4635,District,"$3,022,020",$652.0,77.29,80.93,66.75%,80.86%,73.81%
Holden High School,427,Charter,"$248,087",$581.0,83.8,83.81,92.51%,96.25%,94.38%
Huang High School,2917,District,"$1,910,635",$655.0,76.63,81.18,65.68%,81.32%,73.50%
Johnson High School,4761,District,"$3,094,650",$650.0,77.07,80.97,66.06%,81.22%,73.64%
Pena High School,962,Charter,"$585,858",$609.0,83.84,84.04,94.59%,95.95%,95.27%


## Top Performing Schools (By Passing Rate)

In [81]:
# Sort and show top five schools

aschools = schoolsummaryform_df.sort_values('Stu. Pass % (Both)', ascending = False)

aschools.head()

Unnamed: 0,Total Students,School Type,Total School Budget,Budget per Student,Average Math Score,Average Reading Score,Stu. Pass % (Math),Stu. Pass % (Reading),Stu. Pass % (Both)
Cabrera High School,1858,Charter,"$1,081,356",$582.0,83.06,83.98,94.13%,97.04%,95.59%
Thomas High School,1635,Charter,"$1,043,130",$638.0,83.42,83.85,93.27%,97.31%,95.29%
Griffin High School,1468,Charter,"$917,500",$625.0,83.35,83.82,93.39%,97.14%,95.27%
Pena High School,962,Charter,"$585,858",$609.0,83.84,84.04,94.59%,95.95%,95.27%
Wilson High School,2283,Charter,"$1,319,574",$578.0,83.27,83.99,93.87%,96.54%,95.20%


## Bottom Performing Schools (By Passing Rate)

In [82]:
# Sort and show bottom five schools
dschools = schoolsummaryform_df.sort_values('Stu. Pass % (Both)')

dschools.head()

Unnamed: 0,Total Students,School Type,Total School Budget,Budget per Student,Average Math Score,Average Reading Score,Stu. Pass % (Math),Stu. Pass % (Reading),Stu. Pass % (Both)
Rodriguez High School,3999,District,"$2,547,363",$637.0,76.84,80.74,66.37%,80.22%,73.29%
Figueroa High School,2949,District,"$1,884,411",$639.0,76.71,81.16,65.99%,80.74%,73.36%
Huang High School,2917,District,"$1,910,635",$655.0,76.63,81.18,65.68%,81.32%,73.50%
Johnson High School,4761,District,"$3,094,650",$650.0,77.07,80.97,66.06%,81.22%,73.64%
Ford High School,2739,District,"$1,763,916",$644.0,77.1,80.75,68.31%,79.30%,73.80%


## Math Scores by Grade

In [90]:
# Create data series of scores by grade levels using conditionals
math = df.groupby(['school_name','grade']).mean()['math_score']

# Group each by school name

# Combine series into single data frame
mathdf = pd.DataFrame(math)
# Minor data munging

# Display the data frame
mathdf

Unnamed: 0_level_0,Unnamed: 1_level_0,math_score
school_name,grade,Unnamed: 2_level_1
Bailey High School,10th,76.996772
Bailey High School,11th,77.515588
Bailey High School,12th,76.492218
Bailey High School,9th,77.083676
Cabrera High School,10th,83.154506
Cabrera High School,11th,82.76556
Cabrera High School,12th,83.277487
Cabrera High School,9th,83.094697
Figueroa High School,10th,76.539974
Figueroa High School,11th,76.884344


## Reading Score by Grade 

In [84]:
# Create data series of scores by grade levels using conditionals
reading = df.groupby(['school_name','grade']).mean()['reading_score']
# Group each by school name

# Combine series into single data frame
readdf = pd.DataFrame(reading)
# Minor data munging

# Display the data frame
readdf

Unnamed: 0_level_0,Unnamed: 1_level_0,reading_score
school_name,grade,Unnamed: 2_level_1
Bailey High School,10th,80.907183
Bailey High School,11th,80.945643
Bailey High School,12th,80.912451
Bailey High School,9th,81.303155
Cabrera High School,10th,84.253219
Cabrera High School,11th,83.788382
Cabrera High School,12th,84.287958
Cabrera High School,9th,83.676136
Figueroa High School,10th,81.408912
Figueroa High School,11th,80.640339


## Scores by School Spending

In [85]:
# Establish the bins -- choose any set of bins you would like, but see below for testing bins
# to test, set your bins as follows: [0, 585, 615, 645, 675]
# ALSO -- Note that the values for `% Passing Math`, `% Passing Reading` and `% Overall Passing Rate`
# were computed using averages of averages -- your results may vary if you use weighted averages 

# Categorize the spending based on the bins
bins = [0, 590, 615, 645, 655]
group_names = ["Less than $590", "$590-615", "$615-645", "645-675"]

# Assemble into data frame
schoolsummary_df['Spending Ranges'] = pd.cut(schoolsummary_df['Budget per Student'], bins, labels = group_names, include_lowest = True)
schoolspending_df = schoolsummary_df.groupby('Spending Ranges').mean()

# Minor data munging
schoolspending_df = schoolspending_df[['Average Math Score', 'Average Reading Score', 'Stu. Pass % (Math)', 'Stu. Pass % (Reading)', 'Stu. Pass % (Both)']]
schoolspending_df['Stu. Pass % (Both)'] = schoolspending_df['Stu. Pass % (Both)'].map("{:.2%}".format)
schoolspending_df['Stu. Pass % (Math)'] = schoolspending_df['Stu. Pass % (Math)'].map("{:.2%}".format)
schoolspending_df['Stu. Pass % (Reading)'] = schoolspending_df['Stu. Pass % (Reading)'].map("{:.2%}".format)

# Display results
schoolspending_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,Stu. Pass % (Math),Stu. Pass % (Reading),Stu. Pass % (Both)
Spending Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Less than $590,83.455399,83.933814,93.46%,96.61%,95.04%
$590-615,83.599686,83.885211,94.23%,95.90%,95.07%
$615-645,79.079225,81.891436,75.67%,86.11%,80.89%
645-675,76.99721,81.027843,66.16%,81.13%,73.65%


## Scores by School Size

In [86]:
# Establish the bins 

bins = [0, 1000, 2000, 5000]
group_names = ["Small", "Medium", "Large"]

# Categorize the spending based on the bins
schoolsummary_df['School Size'] = pd.cut(schoolsummary_df['Total Students'], bins, labels = group_names, include_lowest = True)

# Calculate the scores based on bins
schoolsize_df = schoolsummary_df.groupby('School Size').mean()

# Assemble into data frame
schoolsize_df = schoolsize_df[['Average Math Score', 'Average Reading Score', 'Stu. Pass % (Math)', 'Stu. Pass % (Reading)', 'Stu. Pass % (Both)']]

# Minor data munging
schoolsize_df['Stu. Pass % (Both)'] = schoolsize_df['Stu. Pass % (Both)'].map("{:.2%}".format)
schoolsize_df['Stu. Pass % (Math)'] = schoolsize_df['Stu. Pass % (Math)'].map("{:.2%}".format)
schoolsize_df['Stu. Pass % (Reading)'] = schoolsize_df['Stu. Pass % (Reading)'].map("{:.2%}".format)

# Display results
schoolsize_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,Stu. Pass % (Math),Stu. Pass % (Reading),Stu. Pass % (Both)
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small,83.821598,83.929843,93.55%,96.10%,94.82%
Medium,83.374684,83.864438,93.60%,96.79%,95.20%
Large,77.746417,81.344493,69.96%,82.77%,76.36%


## Scores by School Type

In [87]:
# Type | Average Math Score | Average Reading Score | % Passing Math | % Passing Reading | % Overall Passing Rate

# Assemble into data frame
schooltype_df = schoolsummary_df.groupby('School Type').mean()
schooltype_df = schooltype_df[['Average Math Score', 'Average Reading Score', 'Stu. Pass % (Math)', 'Stu. Pass % (Reading)', 'Stu. Pass % (Both)']]

# Minor data munging
schooltype_df['Stu. Pass % (Both)'] = schooltype_df['Stu. Pass % (Both)'].map("{:.2%}".format)
schooltype_df['Stu. Pass % (Math)'] = schooltype_df['Stu. Pass % (Math)'].map("{:.2%}".format)
schooltype_df['Stu. Pass % (Reading)'] = schooltype_df['Stu. Pass % (Reading)'].map("{:.2%}".format)

# Display results
schooltype_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,Stu. Pass % (Math),Stu. Pass % (Reading),Stu. Pass % (Both)
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.473852,83.896421,93.62%,96.59%,95.10%
District,76.956733,80.966636,66.55%,80.80%,73.67%
