### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average math score 

* Calculate the average reading score

* Calculate the overall passing rate (overall average score), i.e. (avg. math score + avg. reading score)/2

* Calculate the percentage of students with a passing math score (70 or greater)

* Calculate the percentage of students with a passing reading score (70 or greater)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

In [2]:
#First do calculations
total_student_count = len(school_data_complete["student_name"])

#Calculate the total number of schools
school_count = len(school_data_complete["school_name"].unique())

#Calculate the total number of students
student_count = len(school_data_complete["student_name"].unique())

#Calculate the total budget
total_budget = sum(school_data_complete['budget'])

#Calculate the average math score 
average_math_score = school_data_complete['math_score'].mean()

#Calculate the average reading score
average_reading_score = school_data_complete['reading_score'].mean()

#Calculate the overall passing rate (overall average score), i.e. (avg. math score + avg. reading score)/2

overall_passing_rate = (average_math_score + average_reading_score)/2

#Calculate the percentage of students with a passing math score (70 or greater)

passing_math_scores = school_data_complete.loc[school_data_complete['math_score'] >= 70].count()

percent_passing_math_score = (passing_math_scores/total_student_count)*100

#Calculate the percentage of students with a passing reading score (70 or greater)

passing_reading_scores = school_data_complete.loc[school_data_complete['reading_score']>= 70].count()

percent_passing_reading_score = (passing_reading_scores/total_student_count)*100



In [3]:
#Create a dataframe to hold the above results

district_summary = pd.DataFrame({"Total Schools": [school_count],
                              "Total Students": [student_count],
                              "Total Budget": [total_budget],
                              "Average Math Score": [average_math_score],
                              "Average Reading Score": [average_reading_score],
                              "% Passing Math": [percent_passing_math_score],
                              "% Passing Reading": [percent_passing_reading_score],
                              "% Overall Passing Rate": [overall_passing_rate]})
                                 
district_summary


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,32715,82932329558,78.985371,81.87784,Student ID 74.980853 student_name 74...,Student ID 85.805463 student_name 85...,80.431606


In [4]:
#Format displayed data

district_summary["Total Schools"]= district_summary["Total Schools"]
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)
district_summary["Average Math Score"]= district_summary["Average Math Score"]
district_summary["Average Reading Score"] = district_summary["Average Reading Score"]
district_summary["% Passing Math"] = district_summary["% Passing Math"]
district_summary["% Passing Reading"]= district_summary["% Passing Reading"]
district_summary["% Overall Passing Rate"] = district_summary["% Overall Passing Rate"].map("{:.2f}%".format)
                                 
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,32715,"$82,932,329,558.00",78.985371,81.87784,Student ID 74.980853 student_name 74...,Student ID 85.805463 student_name 85...,80.43%


## School Summary

* Create an overview table that summarizes key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)
  
* Create a dataframe to hold the above results

In [5]:
school_summary_data = school_data_complete.groupby(['school_name','type'])
school_summary_data.count().head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Student ID,student_name,gender,grade,reading_score,math_score,School ID,size,budget
school_name,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Bailey High School,District,4976,4976,4976,4976,4976,4976,4976,4976,4976
Cabrera High School,Charter,1858,1858,1858,1858,1858,1858,1858,1858,1858
Figueroa High School,District,2949,2949,2949,2949,2949,2949,2949,2949,2949
Ford High School,District,2739,2739,2739,2739,2739,2739,2739,2739,2739
Griffin High School,Charter,1468,1468,1468,1468,1468,1468,1468,1468,1468
Hernandez High School,District,4635,4635,4635,4635,4635,4635,4635,4635,4635
Holden High School,Charter,427,427,427,427,427,427,427,427,427
Huang High School,District,2917,2917,2917,2917,2917,2917,2917,2917,2917
Johnson High School,District,4761,4761,4761,4761,4761,4761,4761,4761,4761
Pena High School,Charter,962,962,962,962,962,962,962,962,962


In [6]:
#Calculate the total number of students
school_student_count = school_summary_data["Student ID"].count()

#Calculate the total budget
school_total_budget= school_summary_data['budget'].sum()

                           
#Calculate the budget per school
                           
per_student_budget = school_total_budget/school_summary_data['size'].sum()                     

#Calculate the average math score 
school_average_math_score = school_summary_data['math_score'].mean()

#Calculate the average reading score
school_average_reading_score = school_summary_data['reading_score'].mean()

#Calculate the overall passing rate (overall average score), i.e. (avg. math score + avg. reading score)/2

school_overall_passing_rate = (school_average_math_score + school_average_reading_score)/2

#Calculate the percentage of students with a passing math score (70 or greater)
#Using the loc function to get a count of  math scores over 70 grouped by school and type.

passing_math_scores = school_data_complete.loc[school_data_complete['math_score'] >= 70].groupby(['school_name','type'])['math_score'].count()

percent_passing_math_score = (passing_math_scores/school_student_count)*100

#Calculate the percentage of students with a passing reading score (70 or greater)
#Using the loc function to get a count of  math scores over 70 grouped by school and type.
passing_reading_scores =school_data_complete.loc[school_data_complete['reading_score'] >= 70].groupby(['school_name','type'])['reading_score'].count()

percent_passing_reading_score = (passing_reading_scores/school_student_count)*100






In [7]:
#Create dataframe to story the school summary values.  

school_summary = pd.DataFrame({
                              "Total Students": school_student_count,
                              "Total School Budget": school_total_budget,
                              "Per Student Budget": per_student_budget,
                              "Average Math Score": school_average_math_score,
                              "Average Reading Score": school_average_reading_score,
                              "% Passing Math": percent_passing_math_score,
                              "% Passing Reading":percent_passing_reading_score,
                              "% Overall Passing Rate": school_overall_passing_rate})
                                 
school_summary



Unnamed: 0_level_0,Unnamed: 1_level_0,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,15549641728,628.0,77.048432,81.033963,66.680064,81.93328,79.041198
Cabrera High School,Charter,1858,2009159448,582.0,83.061895,83.97578,94.133477,97.039828,83.518837
Figueroa High School,District,2949,5557128039,639.0,76.711767,81.15802,65.988471,80.739234,78.934893
Ford High School,District,2739,4831365924,644.0,77.102592,80.746258,68.309602,79.299014,78.924425
Griffin High School,Charter,1468,1346890000,625.0,83.351499,83.816757,93.392371,97.138965,83.584128
Hernandez High School,District,4635,14007062700,652.0,77.289752,80.934412,66.752967,80.862999,79.112082
Holden High School,Charter,427,105933149,581.0,83.803279,83.814988,92.505855,96.252927,83.809133
Huang High School,District,2917,5573322295,655.0,76.629414,81.182722,65.683922,81.316421,78.906068
Johnson High School,District,4761,14733628650,650.0,77.072464,80.966394,66.057551,81.222432,79.019429
Pena High School,Charter,962,563595396,609.0,83.839917,84.044699,94.594595,95.945946,83.942308


In [8]:
#Format displayed data

school_summary["Total Students"] = school_summary["Total Students"].map("{:,}".format)
school_summary["Total School Budget"] = school_summary["Total School Budget"].map("${:,.2f}".format)
school_summary["Per Student Budget"] = school_summary["Per Student Budget"].map("${:,.2f}".format)
school_summary["Average Math Score"] = school_summary["Average Math Score"]
school_summary["Average Reading Score"] = school_summary["Average Reading Score"]
school_summary["% Passing Math"] = school_summary["% Passing Math"].map("{:.2f}%".format)
school_summary["% Passing Reading"] = school_summary["% Passing Reading"].map("{:.2f}%".format)
school_summary["% Overall Passing Rate"] = school_summary["% Overall Passing Rate"].map("{:.2f}%".format)
                                 
school_summary

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,"$15,549,641,728.00",$628.00,77.048432,81.033963,66.68%,81.93%,79.04%
Cabrera High School,Charter,1858,"$2,009,159,448.00",$582.00,83.061895,83.97578,94.13%,97.04%,83.52%
Figueroa High School,District,2949,"$5,557,128,039.00",$639.00,76.711767,81.15802,65.99%,80.74%,78.93%
Ford High School,District,2739,"$4,831,365,924.00",$644.00,77.102592,80.746258,68.31%,79.30%,78.92%
Griffin High School,Charter,1468,"$1,346,890,000.00",$625.00,83.351499,83.816757,93.39%,97.14%,83.58%
Hernandez High School,District,4635,"$14,007,062,700.00",$652.00,77.289752,80.934412,66.75%,80.86%,79.11%
Holden High School,Charter,427,"$105,933,149.00",$581.00,83.803279,83.814988,92.51%,96.25%,83.81%
Huang High School,District,2917,"$5,573,322,295.00",$655.00,76.629414,81.182722,65.68%,81.32%,78.91%
Johnson High School,District,4761,"$14,733,628,650.00",$650.00,77.072464,80.966394,66.06%,81.22%,79.02%
Pena High School,Charter,962,"$563,595,396.00",$609.00,83.839917,84.044699,94.59%,95.95%,83.94%


## Top Performing Schools (By Passing Rate)

* Sort and display the top five schools in overall passing rate

In [9]:
#Find the top performing schools by the percent overall passing rate
# Sort the school summary data descending and get the top five
top_performing_schools = school_summary.sort_values("% Overall Passing Rate", ascending=False)
top_performing_schools.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Pena High School,Charter,962,"$563,595,396.00",$609.00,83.839917,84.044699,94.59%,95.95%,83.94%
Wright High School,Charter,1800,"$1,888,920,000.00",$583.00,83.682222,83.955,93.33%,96.61%,83.82%
Holden High School,Charter,427,"$105,933,149.00",$581.00,83.803279,83.814988,92.51%,96.25%,83.81%
Thomas High School,Charter,1635,"$1,705,517,550.00",$638.00,83.418349,83.84893,93.27%,97.31%,83.63%
Wilson High School,Charter,2283,"$3,012,587,442.00",$578.00,83.274201,83.989488,93.87%,96.54%,83.63%


## Bottom Performing Schools (By Passing Rate)

In [10]:
#Find the bottom performing schools by the percent overall passing rate
# Sort the school summary data ascending and get the top five
bottom_performing_schools = school_summary.sort_values("% Overall Passing Rate")
bottom_performing_schools.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,District,3999,"$10,186,904,637.00",$637.00,76.842711,80.744686,66.37%,80.22%,78.79%
Huang High School,District,2917,"$5,573,322,295.00",$655.00,76.629414,81.182722,65.68%,81.32%,78.91%
Ford High School,District,2739,"$4,831,365,924.00",$644.00,77.102592,80.746258,68.31%,79.30%,78.92%
Figueroa High School,District,2949,"$5,557,128,039.00",$639.00,76.711767,81.15802,65.99%,80.74%,78.93%
Johnson High School,District,4761,"$14,733,628,650.00",$650.00,77.072464,80.966394,66.06%,81.22%,79.02%


## Math Scores by Grade

 Create a table that lists the average Math Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

In [11]:
#Create a series with school, grades and math scores

math_reading_scores = school_data_complete.loc[:, ["school_name", "grade", "math_score","reading_score"]]
math_reading_scores
math_reading_scores.head()

Unnamed: 0,school_name,grade,math_score,reading_score
0,Huang High School,9th,79,66
1,Huang High School,12th,61,94
2,Huang High School,12th,60,90
3,Huang High School,12th,58,67
4,Huang High School,9th,84,97


In [12]:
#Get the average math score by grade

ninth_grade_score = math_reading_scores.loc[math_reading_scores["grade"]== "9th"].groupby(['school_name'])['math_score'].mean()
tenth_grade_score = math_reading_scores.loc[math_reading_scores["grade"]=='10th'].groupby(['school_name'])['math_score'].mean()
eleventh_grade_score = math_reading_scores.loc[math_reading_scores["grade"]=='11th'].groupby(['school_name'])['math_score'].mean()
twelfth_grade_score = math_reading_scores.loc[math_reading_scores["grade"]=='12th'].groupby(['school_name'])['math_score'].mean()



In [13]:
#Create dataframe with 9th,10th,11th,12th math grade scores

math_scores_by_grade = pd.DataFrame({
                              "9th": ninth_grade_score,
                              "10th": tenth_grade_score,
                              "11th": eleventh_grade_score,
                              "12th": twelfth_grade_score,
                              })
                                 
math_scores_by_grade

Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248
Pena High School,83.625455,83.372,84.328125,84.121547


## Reading Score by Grade 

* Perform the same operations as above for reading scores

In [14]:
#Get the average math score by grade
ninth_grade_score = math_reading_scores.loc[math_reading_scores["grade"]== "9th"].groupby(['school_name'])['reading_score'].mean()
tenth_grade_score = math_reading_scores.loc[math_reading_scores["grade"]=='10th'].groupby(['school_name'])['reading_score'].mean()
eleventh_grade_score = math_reading_scores.loc[math_reading_scores["grade"]=='11th'].groupby(['school_name'])['reading_score'].mean()
twelfth_grade_score = math_reading_scores.loc[math_reading_scores["grade"]=='12th'].groupby(['school_name'])['reading_score'].mean()



In [15]:
#Create dataframe with 9th,10th,11th,12th reading scores

reading_scores_by_grade = pd.DataFrame({
                              "9th": ninth_grade_score,
                              "10th": tenth_grade_score,
                              "11th": eleventh_grade_score,
                              "12th": twelfth_grade_score,
                              })
                                 
reading_scores_by_grade

Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699
Hernandez High School,80.86686,80.660147,81.39614,80.857143
Holden High School,83.677165,83.324561,83.815534,84.698795
Huang High School,81.290284,81.512386,81.417476,80.305983
Johnson High School,81.260714,80.773431,80.616027,81.227564
Pena High School,83.807273,83.612,84.335938,84.59116


## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

In [37]:
#Scores by School Spending

scores_by_school = school_summary.loc[:,["Per Student Budget","Average Math Score","Average Reading Score","% Passing Math","% Passing Reading",
"% Overall Passing Rate"]]
                                                                   
scores_by_school.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Bailey High School,District,$628.00,77.048432,81.033963,66.68%,81.93%,79.04%
Cabrera High School,Charter,$582.00,83.061895,83.97578,94.13%,97.04%,83.52%
Figueroa High School,District,$639.00,76.711767,81.15802,65.99%,80.74%,78.93%
Ford High School,District,$644.00,77.102592,80.746258,68.31%,79.30%,78.92%
Griffin High School,Charter,$625.00,83.351499,83.816757,93.39%,97.14%,83.58%


In [38]:
#Get current data types
scores_by_school.dtypes

Per Student Budget         object
Average Math Score        float64
Average Reading Score     float64
% Passing Math             object
% Passing Reading          object
% Overall Passing Rate     object
dtype: object

In [39]:
#Reformat columns to remove symbols and change format to float
scores_by_school["Per Student Budget"] = scores_by_school["Per Student Budget"].replace('[\$]', '', regex=True).astype(float)
scores_by_school["% Passing Math"] = scores_by_school["% Passing Math"].replace('[\%]', '', regex=True).astype(float)
scores_by_school["% Passing Reading"] = scores_by_school["% Passing Reading"].replace('[\%]', '', regex=True).astype(float)
scores_by_school["% Overall Passing Rate"] = scores_by_school["% Overall Passing Rate"].replace('[\%]', '', regex=True).astype(float)

In [40]:
scores_by_school.dtypes

Per Student Budget        float64
Average Math Score        float64
Average Reading Score     float64
% Passing Math            float64
% Passing Reading         float64
% Overall Passing Rate    float64
dtype: object

In [41]:
scores_by_school.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Bailey High School,District,628.0,77.048432,81.033963,66.68,81.93,79.04
Cabrera High School,Charter,582.0,83.061895,83.97578,94.13,97.04,83.52
Figueroa High School,District,639.0,76.711767,81.15802,65.99,80.74,78.93
Ford High School,District,644.0,77.102592,80.746258,68.31,79.3,78.92
Griffin High School,Charter,625.0,83.351499,83.816757,93.39,97.14,83.58


In [42]:
#Bins for student spending budget

spending_bins = [0, 585, 615, 645, 675]
group_names = ["<$585", "$585-615", "$615-645", "$645-675"]

In [43]:
#Put Per student budget into the bins
scores_by_school["Spending Ranges (Per Student)"] = pd.cut(scores_by_school["Per Student Budget"], spending_bins, labels=group_names)
scores_by_school.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate,Spending Ranges (Per Student)
school_name,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Bailey High School,District,628.0,77.048432,81.033963,66.68,81.93,79.04,$615-645
Cabrera High School,Charter,582.0,83.061895,83.97578,94.13,97.04,83.52,<$585
Figueroa High School,District,639.0,76.711767,81.15802,65.99,80.74,78.93,$615-645
Ford High School,District,644.0,77.102592,80.746258,68.31,79.3,78.92,$615-645
Griffin High School,Charter,625.0,83.351499,83.816757,93.39,97.14,83.58,$615-645


In [44]:
#Set index to Spending Ranges
scores_by_school = scores_by_school.set_index("Spending Ranges (Per Student)")
scores_by_school.head()

Unnamed: 0_level_0,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Spending Ranges (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
$615-645,628.0,77.048432,81.033963,66.68,81.93,79.04
<$585,582.0,83.061895,83.97578,94.13,97.04,83.52
$615-645,639.0,76.711767,81.15802,65.99,80.74,78.93
$615-645,644.0,77.102592,80.746258,68.31,79.3,78.92
$615-645,625.0,83.351499,83.816757,93.39,97.14,83.58


In [45]:
#Average Math Score
#Average Reading Score
#% Passing Math
#% Passing Reading
#Overall Passing Rate (Average of the above two)
#Grouped by spending ranges
sbs_grouped = scores_by_school.groupby(['Spending Ranges (Per Student)']).mean()
sbs_grouped

Unnamed: 0_level_0,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Spending Ranges (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
<$585,581.0,83.455399,83.933814,93.46,96.61,83.695
$585-615,604.5,83.599686,83.885211,94.23,95.9,83.74
$615-645,635.166667,79.079225,81.891436,75.668333,86.106667,80.481667
$645-675,652.333333,76.99721,81.027843,66.163333,81.133333,79.013333


## Scores by School Size

* Perform the same operations as above, based on school size.

In [67]:
#Get data for scores by school size
scores_by_size = school_summary.loc[:,["Total Students","Average Math Score","Average Reading Score","% Passing Math","% Passing Reading",
"% Overall Passing Rate"]]
                                                                   
scores_by_size.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Students,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Bailey High School,District,4976,77.048432,81.033963,66.68%,81.93%,79.04%
Cabrera High School,Charter,1858,83.061895,83.97578,94.13%,97.04%,83.52%
Figueroa High School,District,2949,76.711767,81.15802,65.99%,80.74%,78.93%
Ford High School,District,2739,77.102592,80.746258,68.31%,79.30%,78.92%
Griffin High School,Charter,1468,83.351499,83.816757,93.39%,97.14%,83.58%


In [69]:
#Reformat columns to remove symbols and change to float
scores_by_size["Total Students"] = scores_by_size["Total Students"].replace('[\,]', '', regex=True).astype(float)
scores_by_size["% Passing Math"] = scores_by_size["% Passing Math"].replace('[\%]', '', regex=True).astype(float)
scores_by_size["% Passing Reading"] = scores_by_size["% Passing Reading"].replace('[\%]', '', regex=True).astype(float)
scores_by_size["% Overall Passing Rate"] = scores_by_size["% Overall Passing Rate"].replace('[\%]', '', regex=True).astype(float)

In [70]:
scores_by_school.dtypes

Per Student Budget        float64
Average Math Score        float64
Average Reading Score     float64
% Passing Math            float64
% Passing Reading         float64
% Overall Passing Rate    float64
dtype: object

In [71]:
# Sample bins. Feel free to create your own bins.
size_bins = [0, 1000, 2000, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]

In [72]:
#Create column School size and include Total Students in the bins
scores_by_size["School Size"] = pd.cut(scores_by_size["Total Students"], size_bins, labels=group_names)
scores_by_size.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Students,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate,School Size
school_name,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Bailey High School,District,4976.0,77.048432,81.033963,66.68,81.93,79.04,Large (2000-5000)
Cabrera High School,Charter,1858.0,83.061895,83.97578,94.13,97.04,83.52,Medium (1000-2000)
Figueroa High School,District,2949.0,76.711767,81.15802,65.99,80.74,78.93,Large (2000-5000)
Ford High School,District,2739.0,77.102592,80.746258,68.31,79.3,78.92,Large (2000-5000)
Griffin High School,Charter,1468.0,83.351499,83.816757,93.39,97.14,83.58,Medium (1000-2000)


In [73]:
#Set the index for the index to School Size
scores_by_size = scores_by_size.set_index("School Size")
scores_by_size.head()

Unnamed: 0_level_0,Total Students,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Large (2000-5000),4976.0,77.048432,81.033963,66.68,81.93,79.04
Medium (1000-2000),1858.0,83.061895,83.97578,94.13,97.04,83.52
Large (2000-5000),2949.0,76.711767,81.15802,65.99,80.74,78.93
Large (2000-5000),2739.0,77.102592,80.746258,68.31,79.3,78.92
Medium (1000-2000),1468.0,83.351499,83.816757,93.39,97.14,83.58


In [74]:
#Group by School Size
sbss_grouped = scores_by_size.groupby(["School Size"]).mean()
sbss_grouped

Unnamed: 0_level_0,Total Students,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Small (<1000),694.5,83.821598,83.929843,93.55,96.1,83.875
Medium (1000-2000),1704.4,83.374684,83.864438,93.598,96.79,83.618
Large (2000-5000),3657.375,77.746417,81.344493,69.96375,82.76625,79.54375


## Scores by School Type

* Perform the same operations as above, based on school type.

In [77]:
#Get data for scores by school type
scores_by_type = school_summary.loc[:,["Average Math Score","Average Reading Score","% Passing Math","% Passing Reading",
"% Overall Passing Rate"]]
scores_by_type.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
school_name,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bailey High School,District,77.048432,81.033963,66.68%,81.93%,79.04%
Cabrera High School,Charter,83.061895,83.97578,94.13%,97.04%,83.52%
Figueroa High School,District,76.711767,81.15802,65.99%,80.74%,78.93%
Ford High School,District,77.102592,80.746258,68.31%,79.30%,78.92%
Griffin High School,Charter,83.351499,83.816757,93.39%,97.14%,83.58%


In [78]:
#Reformat % averages
scores_by_type["% Passing Math"] = scores_by_type["% Passing Math"].replace('[\%]', '', regex=True).astype(float)
scores_by_type["% Passing Reading"] = scores_by_type["% Passing Reading"].replace('[\%]', '', regex=True).astype(float)
scores_by_type["% Overall Passing Rate"] = scores_by_type["% Overall Passing Rate"].replace('[\%]', '', regex=True).astype(float)


In [80]:
scores_by_type.dtypes

Average Math Score        float64
Average Reading Score     float64
% Passing Math            float64
% Passing Reading         float64
% Overall Passing Rate    float64
dtype: object

In [82]:
#Group by School type
sbst_grouped = scores_by_type.groupby(["type"]).mean()
sbst_grouped

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.473852,83.896421,93.62,96.58625,83.68375
District,76.956733,80.966636,66.548571,80.798571,78.96
