# PyCity Schools Analysis

* As a whole, schools with higher budgets, did not yield better test results. By contrast, schools with higher spending per student actually (\$645-675) underperformed compared to schools with smaller budgets (<\$585 per student).

* As a whole, smaller and medium sized schools dramatically out-performed large sized schools on passing math performances (89-91% passing vs 67%).

* As a whole, charter schools out-performed the public district schools across all metrics. However, more analysis will be required to glean if the effect is due to school practices or the fact that charter schools tend to serve smaller student populations per school. 
---

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset (consider using a left join)
schools_df = pd.merge(student_data, school_data, on="school_name", how='left')
schools_df.head(2)

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635


## District Summary

In [2]:
# Calculate the Totals (Schools and Students)
total_schools = len(schools_df['School ID'].unique())
total_student = len(schools_df['Student ID'].unique())

# Calculate the Total Budget
total_budget = school_data['budget'].sum()

In [3]:
# Calculate the Average Scores

# Calculate the Percentage Pass Rates
percent_passing_math = len(schools_df.loc[schools_df['math_score'] >= 70]) / total_student

percent_passing_reading = len(schools_df.loc[schools_df['reading_score'] >= 70]) / total_student

overall_passing_rate = (percent_passing_reading + percent_passing_math) /2

# Minor Data Cleanup
district_summary_table = pd.DataFrame({
             "Total Schools": [total_schools], 
             "Total Students": [total_student], 
             "Total Budget": [float(total_budget)], 
             "Average Math Score":  [round(schools_df['math_score'].mean())], 
             "Average Reading Score":  [round(schools_df['reading_score'].mean())], 
             "% Passing Math": [round(percent_passing_math * 100)],
             "% Passing Reading": [round(percent_passing_reading * 100)],
             "% Overall Passing Rate": round(overall_passing_rate * 100)
})

district_summary_table["% Passing Math"] = district_summary_table["% Passing Math"].map("{:}%".format)
district_summary_table["% Passing Reading"] = district_summary_table["% Passing Reading"].map("{:}%".format)
district_summary_table["% Overall Passing Rate"] = district_summary_table["% Overall Passing Rate"].map("{:}%".format)
district_summary_table["Total Budget"] = district_summary_table["Total Budget"].map("${:,.2f}".format)
district_summary_table["Total Students"] = district_summary_table["Total Students"].map("{:,}".format)
# Display the data frame
district_summary_table

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,"$24,649,428.00",79,82,75%,86%,80%


## School Summary

In [4]:
schools_group_df = schools_df.groupby("school_name")
schools_group_df

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001E94D71FF48>

In [5]:
# Determine the School Type
school_type = schools_group_df["type"].first()
#school_type = schools_group_df['type']
school_type

school_name
Bailey High School       District
Cabrera High School       Charter
Figueroa High School     District
Ford High School         District
Griffin High School       Charter
Hernandez High School    District
Holden High School        Charter
Huang High School        District
Johnson High School      District
Pena High School          Charter
Rodriguez High School    District
Shelton High School       Charter
Thomas High School        Charter
Wilson High School        Charter
Wright High School        Charter
Name: type, dtype: object

In [6]:
# Calculate the total student count
school_student_count = schools_group_df['school_name'].count()

# Calculate the total school budget and per capita spending
percap = (schools_df.groupby('school_name').mean()['budget']/school_student_count)

# Calculate the average test scores
math_scores = schools_group_df['math_score'].sum()/school_student_count
##math_scores

reading_scores = schools_group_df['reading_score'].sum()/school_student_count
##reading_scores

# Calculate the passing scores by creating a filtered data frame
math_scores_df = schools_df[(schools_df['math_score'] >= 70)]
avg_math_scores = math_scores_df.groupby('school_name').count()['Student ID']/school_student_count
##avg_math_scores

reading_scores_df = schools_df[(schools_df['reading_score'] >= 70)]
avg_reading_scores = reading_scores_df.groupby('school_name').count()['Student ID']/school_student_count
##avg_reading_scores

overall_passing_rate = (avg_math_scores + avg_reading_scores)/2

school_budget = schools_df.groupby('school_name').mean()['budget']

In [7]:
# Convert to data frame
school_summary_table = pd.DataFrame(school_type)
school_summary_table_calc = pd.DataFrame(school_type)
#school_summary_table.head()

school_summary_table_calc["Total Students"]= school_student_count
school_summary_table_calc["Per Student Budget"] = percap
school_summary_table_calc["Total School Budget"] = school_budget
school_summary_table_calc["Average Math Score"] = math_scores
school_summary_table_calc["Average Reading Score"] = reading_scores
school_summary_table_calc["% Passing Math"] = (avg_math_scores * 100)
school_summary_table_calc["% Passing Reading"] = (avg_reading_scores * 100)
school_summary_table_calc["% Overall Passing Rate"] = (overall_passing_rate * 100)
# Minor data munging
school_summary_table["Total Students"]= school_student_count.map("{:,.0f}".format)
school_summary_table["Per Student Budget"] = percap.map("${:,.2f}".format)
school_summary_table["Total School Budget"] = school_budget.map("${:,.2f}".format)
school_summary_table["Average Math Score"] = math_scores.map("{:.0f}".format)
school_summary_table["Average Reading Score"] = reading_scores.map("{:.0f}".format)
school_summary_table["% Passing Math"] = (avg_math_scores * 100).map("{:.0f}%".format)
school_summary_table["% Passing Reading"] = (avg_reading_scores * 100).map("{:.0f}%".format)
school_summary_table["% Overall Passing Rate"] = (overall_passing_rate * 100).map("{:.2f}%".format)
# Display the data frame
school_summary_table

Unnamed: 0_level_0,type,Total Students,Per Student Budget,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,$628.00,"$3,124,928.00",77,81,67%,82%,74.31%
Cabrera High School,Charter,1858,$582.00,"$1,081,356.00",83,84,94%,97%,95.59%
Figueroa High School,District,2949,$639.00,"$1,884,411.00",77,81,66%,81%,73.36%
Ford High School,District,2739,$644.00,"$1,763,916.00",77,81,68%,79%,73.80%
Griffin High School,Charter,1468,$625.00,"$917,500.00",83,84,93%,97%,95.27%
Hernandez High School,District,4635,$652.00,"$3,022,020.00",77,81,67%,81%,73.81%
Holden High School,Charter,427,$581.00,"$248,087.00",84,84,93%,96%,94.38%
Huang High School,District,2917,$655.00,"$1,910,635.00",77,81,66%,81%,73.50%
Johnson High School,District,4761,$650.00,"$3,094,650.00",77,81,66%,81%,73.64%
Pena High School,Charter,962,$609.00,"$585,858.00",84,84,95%,96%,95.27%


In [8]:


#school_summary = school_summary_table.drop(["Student ID", "reading_score","math_score","size","School ID","budget"])
#school_summary = school_summary_table[['school_name', 'type','Total Students','Total School Budget',
#                                       'Per Student Budget','Average Math Score','Average Reading Score',
#                                       '% Passing Math','% Passing Reading','% Overall Passing Rate']]



## Top Performing Schools (By Passing Rate)

In [9]:
# Sort and show top five schools
school_summary_table.sort_values('% Overall Passing Rate', ascending=False).head(5)

Unnamed: 0_level_0,type,Total Students,Per Student Budget,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,$582.00,"$1,081,356.00",83,84,94%,97%,95.59%
Thomas High School,Charter,1635,$638.00,"$1,043,130.00",83,84,93%,97%,95.29%
Griffin High School,Charter,1468,$625.00,"$917,500.00",83,84,93%,97%,95.27%
Pena High School,Charter,962,$609.00,"$585,858.00",84,84,95%,96%,95.27%
Wilson High School,Charter,2283,$578.00,"$1,319,574.00",83,84,94%,97%,95.20%


## Bottom Performing Schools (By Passing Rate)

In [10]:
# Sort and show bottom five schools
school_summary_table.sort_values('% Overall Passing Rate', ascending=True).head(5)

Unnamed: 0_level_0,type,Total Students,Per Student Budget,Total School Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,District,3999,$637.00,"$2,547,363.00",77,81,66%,80%,73.29%
Figueroa High School,District,2949,$639.00,"$1,884,411.00",77,81,66%,81%,73.36%
Huang High School,District,2917,$655.00,"$1,910,635.00",77,81,66%,81%,73.50%
Johnson High School,District,4761,$650.00,"$3,094,650.00",77,81,66%,81%,73.64%
Ford High School,District,2739,$644.00,"$1,763,916.00",77,81,68%,79%,73.80%


## Math Scores by Grade

In [11]:
math_grade_table = pd.DataFrame()
reading_grade_table = pd.DataFrame()

for i in range(9,13):
    reading_grade_table[str(i) + "th"] = schools_df.loc[schools_df['grade'] == str(i) + 'th'].groupby('school_name').mean()["reading_score"].map("{:,.2f}%".format)
    math_grade_table[str(i) + "th"] = schools_df.loc[schools_df['grade'] == str(i) + 'th'].groupby('school_name').mean()["math_score"].map("{:,.2f}%".format)

print(f"\nSchool Math Scores by grade  \n{'---'* 20}\n {math_grade_table}")


School Math Scores by grade  
------------------------------------------------------------
                           9th    10th    11th    12th
school_name                                          
Bailey High School     77.08%  77.00%  77.52%  76.49%
Cabrera High School    83.09%  83.15%  82.77%  83.28%
Figueroa High School   76.40%  76.54%  76.88%  77.15%
Ford High School       77.36%  77.67%  76.92%  76.18%
Griffin High School    82.04%  84.23%  83.84%  83.36%
Hernandez High School  77.44%  77.34%  77.14%  77.19%
Holden High School     83.79%  83.43%  85.00%  82.86%
Huang High School      77.03%  75.91%  76.45%  77.23%
Johnson High School    77.19%  76.69%  77.49%  76.86%
Pena High School       83.63%  83.37%  84.33%  84.12%
Rodriguez High School  76.86%  76.61%  76.40%  77.69%
Shelton High School    83.42%  82.92%  83.38%  83.78%
Thomas High School     83.59%  83.09%  83.50%  83.50%
Wilson High School     83.09%  83.72%  83.20%  83.04%
Wright High School     83.26%  84.01%  83.8

## Reading Score by Grade 

In [12]:
print(f"\n\nSchool Reading Scores by grade \n{'---'* 20}\n {reading_grade_table}")



School Reading Scores by grade 
------------------------------------------------------------
                           9th    10th    11th    12th
school_name                                          
Bailey High School     81.30%  80.91%  80.95%  80.91%
Cabrera High School    83.68%  84.25%  83.79%  84.29%
Figueroa High School   81.20%  81.41%  80.64%  81.38%
Ford High School       80.63%  81.26%  80.40%  80.66%
Griffin High School    83.37%  83.71%  84.29%  84.01%
Hernandez High School  80.87%  80.66%  81.40%  80.86%
Holden High School     83.68%  83.32%  83.82%  84.70%
Huang High School      81.29%  81.51%  81.42%  80.31%
Johnson High School    81.26%  80.77%  80.62%  81.23%
Pena High School       83.81%  83.61%  84.34%  84.59%
Rodriguez High School  80.99%  80.63%  80.86%  80.38%
Shelton High School    84.12%  83.44%  84.37%  82.78%
Thomas High School     83.73%  84.25%  83.59%  83.83%
Wilson High School     83.94%  84.02%  83.76%  84.32%
Wright High School     83.83%  83.81%  8

## Scores by School Spending

In [13]:
school_spending_table = pd.DataFrame(school_summary_table_calc)

# Establish the bins -- choose any set of bins you would like, but see below for testing bins
# to test, set your bins as follows: [0, 585, 615, 645, 675]
# ALSO -- Note that the values for `% Passing Math`, `% Passing Reading` and `% Overall Passing Rate`
# were computed using averages of averages -- your results may vary if you use weighted averages 

# Categorize the spending based on the bins

spending_bins = [0, 585, 615, 645, 675]
labels = ['<$585', '$585-615', '$615-645', "$645-675"]
#bin_labels = ['small', 'mid', 'what', "big"]

school_spending_table["Spending Per Student"] = pd.cut(school_spending_table["Per Student Budget"], spending_bins, labels=labels)
school_spending_table = school_spending_table.groupby("Spending Per Student").mean() 
school_spending_table = school_spending_table.drop(columns = ["Total Students" ,"Total School Budget" ,"Per Student Budget"])

# Assemble into data frame
# Minor data munging
for i in school_spending_table.columns:
    if i[0] == '%':
        school_spending_table[i] = school_spending_table[i].map("{:,.2f}%".format)
    else:   
        school_spending_table[i] = school_spending_table[i].map("{:,.2f}".format)

print(f"\n\nSchool Scores by Spending \n{'---'* 35}\n")
school_spending_table


# Display results




School Scores by Spending 
---------------------------------------------------------------------------------------------------------



Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Spending Per Student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$585,83.46,83.93,93.46%,96.61%,95.04%
$585-615,83.6,83.89,94.23%,95.90%,95.07%
$615-645,79.08,81.89,75.67%,86.11%,80.89%
$645-675,77.0,81.03,66.16%,81.13%,73.65%


## Scores by School Size

In [32]:
# Establish the bins 

bins = [0,1000,2000,5000]
labels = ["Small(<1000)","Medium(1000-2000)","Large(2000-5000)"]
#
# Categorize the spending based on the bins

# Calculate the scores based on bins

# Assemble into data frame
school_score_size_table = pd.DataFrame(school_summary_table_calc)

# Minor data munging

# Display results

# Establish the bins -- choose any set of bins you would like, but see below for testing bins
# to test, set your bins as follows: [0, 585, 615, 645, 675]
# ALSO -- Note that the values for `% Passing Math`, `% Passing Reading` and `% Overall Passing Rate`
# were computed using averages of averages -- your results may vary if you use weighted averages 

# Categorize the spending based on the bins

school_score_size_table["Score Per Student"] = pd.cut(school_score_size_table["Total Students"], bins, labels=labels)
school_score_size_table = school_score_size_table.groupby("Score Per Student").mean() 
school_score_size_table = school_score_size_table.drop(columns = ["Total Students" ,"Total School Budget" ,"Per Student Budget"])

# Assemble into data frame
# Minor data munging
for i in school_score_size_table.columns:
    if i[0] == '%':
        school_score_size_table[i] = school_score_size_table[i].map("{:,.2f}%".format)
    else:   
        school_score_size_table[i] = school_score_size_table[i].map("{:,.2f}".format)


print(f"\n\nSchool Scores by Spending \n{'---'* 35}\n")
school_score_size_table




School Scores by Spending 
---------------------------------------------------------------------------------------------------------



Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Score Per Student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small(<1000),83.82,83.93,93.55%,96.10%,94.82%
Medium(1000-2000),83.37,83.86,93.60%,96.79%,95.20%
Large(2000-5000),77.75,81.34,69.96%,82.77%,76.36%


## Scores by School Type

In [56]:
# Type | Average Math Score | Average Reading Score | % Passing Math | % Passing Reading | % Overall Passing Rate
list_str = ['type','Average Math Score','Average Reading Score','% Passing Math','% Passing Reading','% Overall Passing Rate']


school_score_type_table = pd.DataFrame(school_summary_table_calc[list_str])

#school_score_type_table['% Passing Math'].map("{:}".format)

In [57]:
# Assemble into data frame
school_score_type_table['Average Math Score'] = school_score_type_table['Average Math Score'].map("{:,.2f}".format)
school_score_type_table['Average Reading Score'] = school_score_type_table['Average Reading Score'].map("{:,.2f}".format)
school_score_type_table['% Passing Math'] = school_score_type_table['% Passing Math'].map("{:,.2f}%".format)
school_score_type_table['% Passing Reading'] = school_score_type_table['% Passing Reading'].map("{:,.2f}%".format)
school_score_type_table['% Overall Passing Rate'] = school_score_type_table['% Overall Passing Rate'].map("{:,.2f}%".format)
school_score_type_group = school_score_type_table.groupby(['type']).first()


# Minor data munging


# Display results
school_score_type_group.head()

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.06,83.98,94.13%,97.04%,95.59%
District,77.05,81.03,66.68%,81.93%,74.31%
