In [1]:
import pandas as pd
from functools import reduce

In [2]:
school_data = "Resources/schools_complete.csv"
student_data = "Resources/students_complete.csv"

In [3]:
school_data_df = pd.read_csv(school_data, encoding="utf-8")
student_data_df = pd.read_csv(student_data, encoding="utf-8")

In [110]:
school_data_complete_df = pd.merge(student_data_df, school_data_df, how="left", on=["school_name", "school_name"])
school_data_complete_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [5]:
# Calculate the total number of schools
total_schools = school_data_df["School ID"].count()
total_schools

15

In [6]:
# Calculate the total number of students
total_students = student_data_df["Student ID"].count()
total_students

39170

In [7]:
# Calculate the total budget
total_budget = school_data_df["budget"].sum()
total_budget

24649428

In [8]:
# Calculate the average math score 
avg_math_score = student_data_df["math_score"].mean()
avg_math_score

78.98537145774827

In [9]:
# Calculate the average reading score
avg_reading_score = student_data_df["reading_score"].mean()
avg_reading_score

81.87784018381414

In [10]:
# Calculate the percentage of students with a passing math score (70 or greater)
passing_math = student_data_df.iloc[:,6] >= 70
passing_math = (passing_math).sum()
passing_math

29370

In [11]:
# Calculate the percentage of students with a passing reading score (70 or greater)
passing_reading = student_data_df.iloc[:,5]  >= 70
passing_reading = (passing_reading).sum()
passing_reading               

33610

In [12]:
# Calculate the percentage of students with a passing math score (70 or greater)
perc_passing_math = passing_math / total_students * 100
perc_passing_math

74.9808526933878

In [13]:
# Calculate the percentage of students with a passing reading score (70 or greater)
perc_passing_reading = passing_reading / total_students * 100
perc_passing_reading

85.80546336482001

In [14]:
# Calculate the percentage of students who passed math **and** reading (% Overall Passing)
perc_overall_passing = (student_data_df.iloc[:,6] >= 70) & (student_data_df.iloc[:,5]  >= 70)
perc_overall_passing = (perc_overall_passing).sum() / total_students * 100
perc_overall_passing

65.17232575950983

In [15]:
# District Summary
district_summary_df = pd.DataFrame({"Total Schools": [total_schools],
                          "Total Students": [total_students],
                           "Total Budget": [total_budget],
                          "Average Math Score": [avg_math_score],
                          "Average Reading Score": [avg_reading_score],
                          "% Passing Math": [perc_passing_math],
                          "% Passing Reading": [perc_passing_reading],
                          "% Overall Passing": [perc_overall_passing]})
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,65.172326


In [16]:
group_by_df = school_data_complete_df.groupby("school_name")
group_by_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
...,...,...,...,...,...,...,...,...,...,...,...
37535,37535,Norma Mata,F,10th,Thomas High School,76,76,14,Charter,1635,1043130
37536,37536,Cody Miller,M,11th,Thomas High School,84,82,14,Charter,1635,1043130
37537,37537,Erik Snyder,M,9th,Thomas High School,80,90,14,Charter,1635,1043130
37538,37538,Tanya Martinez,F,9th,Thomas High School,71,69,14,Charter,1635,1043130


In [17]:
type_df = group_by_df["type"].unique()
summary_df = pd.DataFrame(type_df)

In [18]:
school_passing_math = school_data_complete_df[(school_data_complete_df["math_score"] >= 70)]
per_school_passing_math = school_passing_math.groupby(["school_name"]).count()["student_name"]
per_school_counts = school_data_complete_df.groupby(["school_name"]).count()["Student ID"]
per_school_passing_math = per_school_passing_math / per_school_counts * 100

In [19]:
school_passing_reading = school_data_complete_df[(school_data_complete_df["reading_score"] >= 70)]
per_school_passing_reading = school_passing_reading.groupby(["school_name"]).count()["student_name"]
per_school_counts = school_data_complete_df.groupby(["school_name"]).count()["Student ID"]
per_school_passing_reading = per_school_passing_reading / per_school_counts * 100

In [20]:
school_passing_overall = school_data_complete_df[(school_data_complete_df["reading_score"] >= 70) & (school_data_complete_df["math_score"] >= 70)]
per_school_passing_overall = school_passing_overall.groupby(["school_name"]).count()["student_name"]
per_school_counts = school_data_complete_df.groupby(["school_name"]).count()["Student ID"]
per_school_passing_overall = per_school_passing_overall / per_school_counts * 100

In [21]:
# School Summary
summary_df['Total Students'] = group_by_df['student_name'].count()
summary_df['Total School Budget'] = group_by_df['budget'].max()
summary_df['Per Student Budget'] = summary_df['Total School Budget'] / summary_df["Total Students"]
summary_df['Average Math Score'] = group_by_df['math_score'].mean()
summary_df['Average Reading Score'] = group_by_df['reading_score'].mean()
summary_df['% Passing Math'] = per_school_passing_math
summary_df['% Passing Reading'] = per_school_passing_reading
summary_df['% Overall Passing'] = per_school_passing_overall

summary_df["Total School Budget"] = summary_df["Total School Budget"].map("${:,.0f}".format)
summary_df["Per Student Budget"] = summary_df["Per Student Budget"].map("${:,.0f}".format)
summary_df["Average Math Score"] = summary_df["Average Math Score"].map("{:.0f}".format)
summary_df["Average Reading Score"] = summary_df["Average Reading Score"].map("{:.0f}".format)
summary_df["% Passing Math"] = summary_df["% Passing Math"].map("{:.2f}%".format)
summary_df["% Passing Reading"] = summary_df["% Passing Reading"].map("{:.2f}%".format)
summary_df["% Overall Passing"] = summary_df["% Overall Passing"].map("{:.2f}%".format)
summary_df

Unnamed: 0_level_0,type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,[District],4976,"$3,124,928",$628,77,81,66.68%,81.93%,54.64%
Cabrera High School,[Charter],1858,"$1,081,356",$582,83,84,94.13%,97.04%,91.33%
Figueroa High School,[District],2949,"$1,884,411",$639,77,81,65.99%,80.74%,53.20%
Ford High School,[District],2739,"$1,763,916",$644,77,81,68.31%,79.30%,54.29%
Griffin High School,[Charter],1468,"$917,500",$625,83,84,93.39%,97.14%,90.60%
Hernandez High School,[District],4635,"$3,022,020",$652,77,81,66.75%,80.86%,53.53%
Holden High School,[Charter],427,"$248,087",$581,84,84,92.51%,96.25%,89.23%
Huang High School,[District],2917,"$1,910,635",$655,77,81,65.68%,81.32%,53.51%
Johnson High School,[District],4761,"$3,094,650",$650,77,81,66.06%,81.22%,53.54%
Pena High School,[Charter],962,"$585,858",$609,84,84,94.59%,95.95%,90.54%


In [105]:
total_students = school_data_complete_df.groupby(['school_name']).count()['student_name']
per_school_budget = school_data_complete_df.groupby(["school_name"]).mean()["budget"]
per_student_budget = per_school_budget / total_students
student_math_avg = school_data_complete_df.groupby(["school_name"]).mean()['math_score']
student_reading_avg = school_data_complete_df.groupby(["school_name"]).mean()['reading_score']
school_passing_math = school_data_complete_df[(school_data_complete_df["math_score"] >= 70)]
per_school_passing_math = school_passing_math.groupby(["school_name"]).count()["student_name"]
per_school_counts = school_data_complete_df.groupby(["school_name"]).count()["Student ID"]
per_school_passing_math = per_school_passing_math / per_school_counts * 100
school_passing_read = school_data_complete_df[(school_data_complete_df["reading_score"] >= 70)]
per_school_passing_read = school_passing_read.groupby(["school_name"]).count()["student_name"]
per_school_passing_read = per_school_passing_read / per_school_counts * 100
school_type = school_data_complete_df.groupby('school_name')['type'].unique()
total_students.head()

In [24]:
summary_df.rename(columns = {'type':'School Type'})
summary_df.head()

Unnamed: 0_level_0,type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,[District],4976,"$3,124,928",$628,77,81,66.68%,81.93%,54.64%
Cabrera High School,[Charter],1858,"$1,081,356",$582,83,84,94.13%,97.04%,91.33%
Figueroa High School,[District],2949,"$1,884,411",$639,77,81,65.99%,80.74%,53.20%
Ford High School,[District],2739,"$1,763,916",$644,77,81,68.31%,79.30%,54.29%
Griffin High School,[Charter],1468,"$917,500",$625,83,84,93.39%,97.14%,90.60%


In [25]:
# Top Performing Schools (By % Overall Passing)
top_perf_schools = summary_df.sort_values(["% Overall Passing"], ascending=False)
top_perf_schools.head(5)


Unnamed: 0_level_0,type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,[Charter],1858,"$1,081,356",$582,83,84,94.13%,97.04%,91.33%
Thomas High School,[Charter],1635,"$1,043,130",$638,83,84,93.27%,97.31%,90.95%
Griffin High School,[Charter],1468,"$917,500",$625,83,84,93.39%,97.14%,90.60%
Wilson High School,[Charter],2283,"$1,319,574",$578,83,84,93.87%,96.54%,90.58%
Pena High School,[Charter],962,"$585,858",$609,84,84,94.59%,95.95%,90.54%


In [26]:
# Bottom Performing Schools (By % Overall Passing)
bottom_perf_schools = summary_df.sort_values(by="% Overall Passing",)
bottom_perf_schools.head(5)

Unnamed: 0_level_0,type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,[District],3999,"$2,547,363",$637,77,81,66.37%,80.22%,52.99%
Figueroa High School,[District],2949,"$1,884,411",$639,77,81,65.99%,80.74%,53.20%
Huang High School,[District],2917,"$1,910,635",$655,77,81,65.68%,81.32%,53.51%
Hernandez High School,[District],4635,"$3,022,020",$652,77,81,66.75%,80.86%,53.53%
Johnson High School,[District],4761,"$3,094,650",$650,77,81,66.06%,81.22%,53.54%


In [27]:
# Math Score by Grade 
only_9_math = school_data_complete_df.loc[school_data_complete_df["grade"] == "9th"]
only_9_math = only_9_math.rename(columns={'math_score': '9th'})
only_10_math = school_data_complete_df.loc[school_data_complete_df["grade"] == "10th"]
only_10_math = only_10_math.rename(columns={'math_score': '10th'})
only_11_math = school_data_complete_df.loc[school_data_complete_df["grade"] == "11th"]
only_11_math = only_11_math.rename(columns={'math_score': '11th'})
only_12_math = school_data_complete_df.loc[school_data_complete_df["grade"] == "12th"]
only_12_math = only_12_math.rename(columns={'math_score': '12th'})

In [28]:
grouped_9 = only_9_math.groupby(["school_name"]).mean(['math_score'])
del grouped_9['reading_score']
del grouped_9['School ID']
del grouped_9['size']
del grouped_9['budget']
del grouped_9['Student ID']
grouped_10 = only_10_math.groupby(["school_name"]).mean(['math_score'])
del grouped_10['reading_score']
del grouped_10['School ID']
del grouped_10['size']
del grouped_10['budget']
del grouped_10['Student ID']
grouped_11 = only_11_math.groupby(["school_name"]).mean(['math_score'])
del grouped_11['reading_score']
del grouped_11['School ID']
del grouped_11['size']
del grouped_11['budget']
del grouped_11['Student ID']
grouped_12 = only_12_math.groupby(["school_name"]).mean(['math_score'])
del grouped_12['reading_score']
del grouped_12['School ID']
del grouped_12['size']
del grouped_12['budget']
del grouped_12['Student ID']

In [29]:
data_frames = [grouped_9, grouped_10, grouped_11, grouped_12]
math_by_grade = reduce(lambda  left,right: pd.merge(left,right,on=['school_name'],
                                            how='outer'), data_frames)
math_by_grade.head()
math_by_grade

Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248
Pena High School,83.625455,83.372,84.328125,84.121547


In [30]:
# Reading Score by Grade 
only_9 = school_data_complete_df.loc[school_data_complete_df["grade"] == "9th"]
only_9 = only_9.rename(columns={'reading_score': '9th'})
only_10 = school_data_complete_df.loc[school_data_complete_df["grade"] == "10th"]
only_10 = only_10.rename(columns={'reading_score': '10th'})
only_11 = school_data_complete_df.loc[school_data_complete_df["grade"] == "11th"]
only_11 = only_11.rename(columns={'reading_score': '11th'})
only_12 = school_data_complete_df.loc[school_data_complete_df["grade"] == "12th"]
only_12 = only_12.rename(columns={'reading_score': '12th'})

In [31]:
grouped_9 = only_9.groupby(["school_name"]).mean(['reading_score'])
del grouped_9['math_score']
del grouped_9['School ID']
del grouped_9['size']
del grouped_9['budget']
del grouped_9['Student ID']
grouped_10 = only_10.groupby(["school_name"]).mean(['reading_score'])
del grouped_10['math_score']
del grouped_10['School ID']
del grouped_10['size']
del grouped_10['budget']
del grouped_10['Student ID']
grouped_11 = only_11.groupby(["school_name"]).mean(['reading_score'])
del grouped_11['math_score']
del grouped_11['School ID']
del grouped_11['size']
del grouped_11['budget']
del grouped_11['Student ID']
grouped_12 = only_12.groupby(["school_name"]).mean(['reading_score'])
del grouped_12['math_score']
del grouped_12['School ID']
del grouped_12['size']
del grouped_12['budget']
del grouped_12['Student ID']

In [32]:
data_frames = [grouped_9, grouped_10, grouped_11, grouped_12]
read_by_grade = reduce(lambda  left,right: pd.merge(left,right,on=['school_name'],
                                            how='outer'), data_frames)
read_by_grade.head()
read_by_grade

Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699
Hernandez High School,80.86686,80.660147,81.39614,80.857143
Holden High School,83.677165,83.324561,83.815534,84.698795
Huang High School,81.290284,81.512386,81.417476,80.305983
Johnson High School,81.260714,80.773431,80.616027,81.227564
Pena High School,83.807273,83.612,84.335938,84.59116


In [33]:
# Scores by School Spending
spending_results = pd.DataFrame({
                                "Average Math Score" : student_math_avg,
                                "Average Reading Score": student_reading_avg,
                                "% Passing Math": per_school_passing_math,
                                "% Passing Reading": per_school_passing_read,
                                "% Overall Passing": per_school_passing_overall,
                                "Spending Ranges (Per Student)": per_student_budget
                                
                              })
spending_results

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,Spending Ranges (Per Student)
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bailey High School,77.048432,81.033963,66.680064,81.93328,54.642283,628.0
Cabrera High School,83.061895,83.97578,94.133477,97.039828,91.334769,582.0
Figueroa High School,76.711767,81.15802,65.988471,80.739234,53.204476,639.0
Ford High School,77.102592,80.746258,68.309602,79.299014,54.289887,644.0
Griffin High School,83.351499,83.816757,93.392371,97.138965,90.599455,625.0
Hernandez High School,77.289752,80.934412,66.752967,80.862999,53.527508,652.0
Holden High School,83.803279,83.814988,92.505855,96.252927,89.227166,581.0
Huang High School,76.629414,81.182722,65.683922,81.316421,53.513884,655.0
Johnson High School,77.072464,80.966394,66.057551,81.222432,53.539172,650.0
Pena High School,83.839917,84.044699,94.594595,95.945946,90.540541,609.0


In [34]:
bins = [0, 585, 630, 645, 680]
group_names = ["$585", "$585-630", "$630-645", "$645-680"]
spending_results["Spending Ranges"] = pd.cut(spending_results["Spending Ranges (Per Student)"], bins, labels=group_names, include_lowest=True)
spending_results

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,Spending Ranges (Per Student),Spending Ranges
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Bailey High School,77.048432,81.033963,66.680064,81.93328,54.642283,628.0,$585-630
Cabrera High School,83.061895,83.97578,94.133477,97.039828,91.334769,582.0,$585
Figueroa High School,76.711767,81.15802,65.988471,80.739234,53.204476,639.0,$630-645
Ford High School,77.102592,80.746258,68.309602,79.299014,54.289887,644.0,$630-645
Griffin High School,83.351499,83.816757,93.392371,97.138965,90.599455,625.0,$585-630
Hernandez High School,77.289752,80.934412,66.752967,80.862999,53.527508,652.0,$645-680
Holden High School,83.803279,83.814988,92.505855,96.252927,89.227166,581.0,$585
Huang High School,76.629414,81.182722,65.683922,81.316421,53.513884,655.0,$645-680
Johnson High School,77.072464,80.966394,66.057551,81.222432,53.539172,650.0,$645-680
Pena High School,83.839917,84.044699,94.594595,95.945946,90.540541,609.0,$585-630


In [35]:
spending_results = spending_results.groupby("Spending Ranges")

In [36]:
spending_results.max()

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,Spending Ranges (Per Student)
Spending Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
$585,83.803279,83.989488,94.133477,97.039828,91.334769,583.0
$585-630,83.839917,84.044699,94.594595,97.138965,90.599455,628.0
$630-645,83.418349,83.84893,93.272171,97.308869,90.948012,644.0
$645-680,77.289752,81.182722,66.752967,81.316421,53.539172,655.0


In [37]:
# Scores by School Size
size_results = pd.DataFrame({
                                "Average Math Score" : student_math_avg,
                                "Average Reading Score": student_reading_avg,
                                "% Passing Math": per_school_passing_math,
                                "% Passing Reading": per_school_passing_read,
                                "% Overall Passing": per_school_passing_overall,
                                "Total Students": total_students
                              })
size_results

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,Total Students
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bailey High School,77.048432,81.033963,66.680064,81.93328,54.642283,4976
Cabrera High School,83.061895,83.97578,94.133477,97.039828,91.334769,1858
Figueroa High School,76.711767,81.15802,65.988471,80.739234,53.204476,2949
Ford High School,77.102592,80.746258,68.309602,79.299014,54.289887,2739
Griffin High School,83.351499,83.816757,93.392371,97.138965,90.599455,1468
Hernandez High School,77.289752,80.934412,66.752967,80.862999,53.527508,4635
Holden High School,83.803279,83.814988,92.505855,96.252927,89.227166,427
Huang High School,76.629414,81.182722,65.683922,81.316421,53.513884,2917
Johnson High School,77.072464,80.966394,66.057551,81.222432,53.539172,4761
Pena High School,83.839917,84.044699,94.594595,95.945946,90.540541,962


In [38]:
bins = [0, 1000, 2000, 5000,]
group_names = ["Small <1000", "Medium 1000-200", "Large 2000-5000",]
size_results["Size Ranges"] = pd.cut(size_results["Total Students"], bins, labels=group_names, include_lowest=True)
size_results

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,Total Students,Size Ranges
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Bailey High School,77.048432,81.033963,66.680064,81.93328,54.642283,4976,Large 2000-5000
Cabrera High School,83.061895,83.97578,94.133477,97.039828,91.334769,1858,Medium 1000-200
Figueroa High School,76.711767,81.15802,65.988471,80.739234,53.204476,2949,Large 2000-5000
Ford High School,77.102592,80.746258,68.309602,79.299014,54.289887,2739,Large 2000-5000
Griffin High School,83.351499,83.816757,93.392371,97.138965,90.599455,1468,Medium 1000-200
Hernandez High School,77.289752,80.934412,66.752967,80.862999,53.527508,4635,Large 2000-5000
Holden High School,83.803279,83.814988,92.505855,96.252927,89.227166,427,Small <1000
Huang High School,76.629414,81.182722,65.683922,81.316421,53.513884,2917,Large 2000-5000
Johnson High School,77.072464,80.966394,66.057551,81.222432,53.539172,4761,Large 2000-5000
Pena High School,83.839917,84.044699,94.594595,95.945946,90.540541,962,Small <1000


In [39]:
size_results = size_results.groupby(["Size Ranges"])
size_results.max()

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,Total Students
Size Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Small <1000,83.839917,84.044699,94.594595,96.252927,90.540541,962
Medium 1000-200,83.682222,83.97578,94.133477,97.308869,91.334769,1858
Large 2000-5000,83.274201,83.989488,93.867718,96.539641,90.582567,4976


In [117]:
# Scores by School Type
type_results = pd.DataFrame({
                                "Average Math Score" : student_math_avg,
                                "Average Reading Score": student_reading_avg,
                                "% Passing Math": per_school_passing_math,
                                "% Passing Reading": per_school_passing_read,
                                "% Overall Passing": per_school_passing_overall,
                                 "Total Students": total_students,
                                "School Type": school_type
                              })
type_results
type_results.head()

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing,Total Students,School Type
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Bailey High School,77.048432,81.033963,66.680064,81.93328,54.642283,4976,[District]
Cabrera High School,83.061895,83.97578,94.133477,97.039828,91.334769,1858,[Charter]
Figueroa High School,76.711767,81.15802,65.988471,80.739234,53.204476,2949,[District]
Ford High School,77.102592,80.746258,68.309602,79.299014,54.289887,2739,[District]
Griffin High School,83.351499,83.816757,93.392371,97.138965,90.599455,1468,[Charter]


AttributeError: 'SeriesGroupBy' object has no attribute 'len'

In [126]:
group_names = ["Charter", "District",]
type_results["School Type"] = pd.qcut(type_results["School Type"]
type_results

TypeError: qcut() got an unexpected keyword argument 'str'

In [121]:
type_results = type_results.groupby(["School Type"])

AttributeError: 'DataFrameGroupBy' object has no attribute 'groupby'

In [122]:
type_results.head()

TypeError: unhashable type: 'numpy.ndarray'