In [1]:
# Dependencies and Setup
import pandas as pd

# Files to Load
school_data_to_load = "schools_complete.csv"
student_data_to_load = "students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [2]:
#find total number of schools
schools = len(school_data_complete["school_name"].value_counts())

#find total number of students - each row is a unique student
students = school_data_complete["student_name"].count()

#find total budget
budget = school_data_complete["budget"].value_counts()
#for series created, index is actual $, budget column is # of times it appears in df.  We need sum of index
#store as df and reset index
budget = pd.DataFrame(budget)
budget = budget.reset_index()
#find sum of correct column and store as variable
budget_total = budget["index"].sum()

#find average math score
avg_math = round(school_data_complete["math_score"].mean(), 5)

#find average reading score
avg_read = round(school_data_complete["reading_score"].mean(), 5)

#find % passing math - first count # with passing score, then divide by total students & multiply by 100
math_passing = len(school_data_complete[school_data_complete["math_score"]>69])
percent_pass_math = round(math_passing/students*100, 5)

#find % passing reading
read_passing = len(school_data_complete[school_data_complete["reading_score"]>69])
percent_pass_read = round(read_passing/students*100, 5)

#find % overall passing
overall_pass = round((percent_pass_read + percent_pass_math)/2, 5)


In [3]:
#District Summary Table with nice format
#put values into summary table (dataframe) for District
Summary_District = {"Total schools": schools, "Total students": students, 
                    "Total budget": budget_total, "Average Math Score": avg_math, 
                    "Average Reading Score": avg_read, "% Passed Math": percent_pass_math,
                   "% Passed Reading": percent_pass_read, "Overall Passing Rate": overall_pass}

Summary_District = pd.DataFrame(Summary_District, index=[0])

#format columns appropriately
Summary_District["Total budget"] = Summary_District["Total budget"].map("${:,}".format)
Summary_District["Total students"] = Summary_District["Total students"].map("{:,}".format)
Summary_District

Unnamed: 0,Total schools,Total students,Total budget,Average Math Score,Average Reading Score,% Passed Math,% Passed Reading,Overall Passing Rate
0,15,39170,"$24,649,428",78.98537,81.87784,74.98085,85.80546,80.39316


In [4]:
#School Summary Table
#create beginning of summary table by saving new df with school name, type, size and budget
school_summary_df = school_data[["school_name", "type", "size", "budget"]]

#add per student budget to df
school_summary_df["Per Student Budget"] = school_summary_df["budget"]/school_summary_df["size"]

#find mean math scores by school
#start by saving new df with school name, school size, and math_score
s_math_df = school_data_complete[["school_name", "size", "math_score"]]

#now, group by school and find mean
s_math = s_math_df.groupby("school_name").math_score.mean().reset_index()

#change column name to be average math score
s_math = s_math.rename(columns={"math_score": "Avg Math Score"})

#now merge with the school summary df
school_summary_df = pd.merge(school_summary_df, s_math, on="school_name")

#find mean reading scores by school
#start by saving new df with school name, school size, and reading score
s_read_df = school_data_complete[["school_name", "size", "reading_score"]]

#now, group by school and find mean
s_read = s_read_df.groupby("school_name").reading_score.mean().reset_index()

#change column name to be average reading score
s_read = s_read.rename(columns={"reading_score": "Avg Reading Score"})

#now merge with the school summary df
school_summary_df = pd.merge(school_summary_df, s_read, on="school_name")
school_summary_df

Unnamed: 0,school_name,type,size,budget,Per Student Budget,Avg Math Score,Avg Reading Score
0,Huang High School,District,2917,1910635,655.0,76.629414,81.182722
1,Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802
2,Shelton High School,Charter,1761,1056600,600.0,83.359455,83.725724
3,Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412
4,Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757
5,Wilson High School,Charter,2283,1319574,578.0,83.274201,83.989488
6,Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578
7,Bailey High School,District,4976,3124928,628.0,77.048432,81.033963
8,Holden High School,Charter,427,248087,581.0,83.803279,83.814988
9,Pena High School,Charter,962,585858,609.0,83.839917,84.044699


In [5]:
#Find Math Passing rate
#first, filter earlier df with school name, size and math score by passing scores only
only_math = school_data_complete.loc[s_math_df["math_score"]>69,:]

#group by school and return number of passing scores by school
math_math = only_math.groupby("school_name").math_score.count().reset_index()

#rename math score column to passing rate
math_math = math_math.rename(columns={"math_score": "passing_rate"})

#merge with original math df so we have size of school and pass math count in same df
s_math_df = pd.merge(s_math_df, math_math, on="school_name")

#remove duplicate rows so we only have one row per school
#doesn't matter which one we keep, as the total score is the same for each entry per school
s_math_df.drop_duplicates(subset= "school_name", keep="first", inplace=True)

#to find % passed, we need to divide number of passing scores(passing_rate))by size and multiply by 100
s_math_df["passing_rate"] = s_math_df["passing_rate"]/s_math_df["size"]

s_math_df["passing_rate"] = s_math_df["passing_rate"]*100

#keep only columns we need - school name to merge by, and passing_rate to add to our school summary df
s_math_df =s_math_df[["school_name", "passing_rate"]]

#merge math passing rate into school summary df
school_summary_df = pd.merge(school_summary_df, s_math_df, on="school_name")

#change math passing rate column header
school_summary_df = school_summary_df.rename(columns={"passing_rate": "% Passing Math"})
school_summary_df

Unnamed: 0,school_name,type,size,budget,Per Student Budget,Avg Math Score,Avg Reading Score,% Passing Math
0,Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922
1,Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471
2,Shelton High School,Charter,1761,1056600,600.0,83.359455,83.725724,93.867121
3,Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967
4,Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371
5,Wilson High School,Charter,2283,1319574,578.0,83.274201,83.989488,93.867718
6,Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477
7,Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064
8,Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855
9,Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595


In [6]:
#find passing rate for reading for each school
s_read_df
#first, filter earlier df with school name, size and math score by passing scores only
only_read = s_read_df.loc[s_read_df["reading_score"]>69,:]

#group by school and return number of passing scores by school
read_read = only_read.groupby("school_name").reading_score.count().reset_index()

#rename math score column to passing rate
read_read = read_read.rename(columns={"reading_score": "passing_rate"})

#merge with original math df so we have size of school and pass math count in same df
s_read_df = pd.merge(s_read_df, read_read, on="school_name")

#remove duplicate rows so we only have one row per school
s_read_df.drop_duplicates(subset= "school_name", keep="first", inplace=True)

#to find % passed, we need to divide number of passing scores(passing_rate))by size and multiply by 100
s_read_df["passing_rate"] = s_read_df["passing_rate"]/s_read_df["size"]

s_read_df["passing_rate"] = s_read_df["passing_rate"]*100

#keep only columns we need - school name to merge by, and passing_rate to add to our school summary df
s_read_df =s_read_df[["school_name", "passing_rate"]]

#merge reading passing rate into school summary df
school_summary_df = pd.merge(school_summary_df, s_read_df, on="school_name")

#change reading passing rate column header
school_summary_df = school_summary_df.rename(columns={"passing_rate": "% Passing Reading"})
school_summary_df

Unnamed: 0,school_name,type,size,budget,Per Student Budget,Avg Math Score,Avg Reading Score,% Passing Math,% Passing Reading
0,Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421
1,Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234
2,Shelton High School,Charter,1761,1056600,600.0,83.359455,83.725724,93.867121,95.854628
3,Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999
4,Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965
5,Wilson High School,Charter,2283,1319574,578.0,83.274201,83.989488,93.867718,96.539641
6,Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828
7,Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328
8,Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927
9,Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946


In [7]:
#add column to school summary table that gives overall passing rate
school_summary_df["% Overall Passing Rate"] = school_summary_df["% Passing Math"]+ school_summary_df["% Passing Reading"]
school_summary_df["% Overall Passing Rate"] = school_summary_df["% Overall Passing Rate"]/2

#set index to school name
school_summary_df = school_summary_df.set_index("school_name")

#remove index name so it looks pretty
school_summary_df.rename_axis("", axis='index', inplace=True)

#change column names
school_summary_df = school_summary_df.rename(columns={"type": "School Type",
                                                     "size": "Total Students",
                                                     "budget": "Total School Budget"})

#format columns appropriately
school_summary_df["Total School Budget"] = school_summary_df["Total School Budget"].map("${:,}".format)
school_summary_df["Total Students"] = school_summary_df["Total Students"].map("{:,}".format)
school_summary_df

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Avg Math Score,Avg Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
,,,,,,,,,
Huang High School,District,2917.0,"$1,910,635",655.0,76.629414,81.182722,65.683922,81.316421,73.500171
Figueroa High School,District,2949.0,"$1,884,411",639.0,76.711767,81.15802,65.988471,80.739234,73.363852
Shelton High School,Charter,1761.0,"$1,056,600",600.0,83.359455,83.725724,93.867121,95.854628,94.860875
Hernandez High School,District,4635.0,"$3,022,020",652.0,77.289752,80.934412,66.752967,80.862999,73.807983
Griffin High School,Charter,1468.0,"$917,500",625.0,83.351499,83.816757,93.392371,97.138965,95.265668
Wilson High School,Charter,2283.0,"$1,319,574",578.0,83.274201,83.989488,93.867718,96.539641,95.203679
Cabrera High School,Charter,1858.0,"$1,081,356",582.0,83.061895,83.97578,94.133477,97.039828,95.586652
Bailey High School,District,4976.0,"$3,124,928",628.0,77.048432,81.033963,66.680064,81.93328,74.306672
Holden High School,Charter,427.0,"$248,087",581.0,83.803279,83.814988,92.505855,96.252927,94.379391


In [8]:
#find highest performing schools by overall passing rate
#sort so that it goes from highest overall passing rate to lowest
top_schools = school_summary_df.sort_values(by="% Overall Passing Rate", ascending = False)

#get first five rows
top_schools.head(5)

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Avg Math Score,Avg Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
,,,,,,,,,
Cabrera High School,Charter,1858.0,"$1,081,356",582.0,83.061895,83.97578,94.133477,97.039828,95.586652
Thomas High School,Charter,1635.0,"$1,043,130",638.0,83.418349,83.84893,93.272171,97.308869,95.29052
Pena High School,Charter,962.0,"$585,858",609.0,83.839917,84.044699,94.594595,95.945946,95.27027
Griffin High School,Charter,1468.0,"$917,500",625.0,83.351499,83.816757,93.392371,97.138965,95.265668
Wilson High School,Charter,2283.0,"$1,319,574",578.0,83.274201,83.989488,93.867718,96.539641,95.203679


In [9]:
#find lowest performing schools by overall passing rate
#sort by lowest to highest
bottom_schools = school_summary_df.sort_values(by="% Overall Passing Rate", ascending = True)

#get first five rows
bottom_schools.head(5)

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Avg Math Score,Avg Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
,,,,,,,,,
Rodriguez High School,District,3999.0,"$2,547,363",637.0,76.842711,80.744686,66.366592,80.220055,73.293323
Figueroa High School,District,2949.0,"$1,884,411",639.0,76.711767,81.15802,65.988471,80.739234,73.363852
Huang High School,District,2917.0,"$1,910,635",655.0,76.629414,81.182722,65.683922,81.316421,73.500171
Johnson High School,District,4761.0,"$3,094,650",650.0,77.072464,80.966394,66.057551,81.222432,73.639992
Ford High School,District,2739.0,"$1,763,916",644.0,77.102592,80.746258,68.309602,79.299014,73.804308


In [10]:
#table of average math score per grade for each school

#find what grade levels
level = school_data_complete["grade"].unique()

#create series for 9th grade first - filter so only 9th
ninth = school_data_complete.loc[school_data_complete["grade"]=="9th",:]
#group by school and find mean
ninth_math_mean = ninth.groupby("school_name").math_score.mean()

#create series for 10th grade
tenth = school_data_complete.loc[school_data_complete["grade"]=="10th",:]
#group by school and find mean
tenth_math_mean = tenth.groupby("school_name").math_score.mean()

#create series for 11th grade
eleventh = school_data_complete.loc[school_data_complete["grade"]=="11th",:]
#group by school and find mean
eleventh_math_mean = eleventh.groupby("school_name").math_score.mean()

#create series for 12th grade
twelfth = school_data_complete.loc[school_data_complete["grade"]=="12th",:]
#group by school and find mean
twelfth_math_mean = twelfth.groupby("school_name").math_score.mean()

#create final dataframe
grade_and_math = pd.DataFrame({"9th": ninth_math_mean, "10th": tenth_math_mean,
                              "11th": eleventh_math_mean, "12th": twelfth_math_mean})

#make it look pretty
grade_and_math.rename_axis("", axis='index', inplace=True)
grade_and_math

Unnamed: 0,9th,10th,11th,12th
,,,,
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248


In [11]:
#table of average reading score per grade for each school

#create series for 9th grade first - filter so only 9th
ninth = school_data_complete.loc[school_data_complete["grade"]=="9th",:]
#group by school and find mean
ninth_read_mean = ninth.groupby("school_name").reading_score.mean()

#create series for 10th grade first - filter so only 10th
tenth = school_data_complete.loc[school_data_complete["grade"]=="10th",:]
#group by school and find mean
tenth_read_mean = tenth.groupby("school_name").reading_score.mean()

#create series for 11th grade first - filter so only 11th
eleventh = school_data_complete.loc[school_data_complete["grade"]=="11th",:]
#group by school and find mean
eleventh_read_mean = eleventh.groupby("school_name").reading_score.mean()

#create series for 12th grade first - filter so only 12th
twelfth = school_data_complete.loc[school_data_complete["grade"]=="12th",:]
#group by school and find mean
twelfth_read_mean = twelfth.groupby("school_name").reading_score.mean()

#create final dataframe
grade_and_read = pd.DataFrame({"9th": ninth_read_mean, "10th": tenth_read_mean,
                              "11th": eleventh_read_mean, "12th": twelfth_read_mean})

#make it look pretty
grade_and_read.rename_axis("", axis='index', inplace=True)
grade_and_read

Unnamed: 0,9th,10th,11th,12th
,,,,
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699
Hernandez High School,80.86686,80.660147,81.39614,80.857143
Holden High School,83.677165,83.324561,83.815534,84.698795
Huang High School,81.290284,81.512386,81.417476,80.305983
Johnson High School,81.260714,80.773431,80.616027,81.227564


In [12]:
#scores by school spending(per student)
#start with school summary df which has all the columns we need, and create bins
spending_bins = [0, 585, 615, 645, 675]
group_names = ["<$585", "$585-615", "$615-645", "$645-675"]
school_summary_df["spending"] = pd.cut(school_summary_df["Per Student Budget"], spending_bins, labels=group_names)
school_summary_df

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Avg Math Score,Avg Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate,spending
,,,,,,,,,,
Huang High School,District,2917.0,"$1,910,635",655.0,76.629414,81.182722,65.683922,81.316421,73.500171,$645-675
Figueroa High School,District,2949.0,"$1,884,411",639.0,76.711767,81.15802,65.988471,80.739234,73.363852,$615-645
Shelton High School,Charter,1761.0,"$1,056,600",600.0,83.359455,83.725724,93.867121,95.854628,94.860875,$585-615
Hernandez High School,District,4635.0,"$3,022,020",652.0,77.289752,80.934412,66.752967,80.862999,73.807983,$645-675
Griffin High School,Charter,1468.0,"$917,500",625.0,83.351499,83.816757,93.392371,97.138965,95.265668,$615-645
Wilson High School,Charter,2283.0,"$1,319,574",578.0,83.274201,83.989488,93.867718,96.539641,95.203679,<$585
Cabrera High School,Charter,1858.0,"$1,081,356",582.0,83.061895,83.97578,94.133477,97.039828,95.586652,<$585
Bailey High School,District,4976.0,"$3,124,928",628.0,77.048432,81.033963,66.680064,81.93328,74.306672,$615-645
Holden High School,Charter,427.0,"$248,087",581.0,83.803279,83.814988,92.505855,96.252927,94.379391,<$585


In [13]:
#rename columns so we have handles to use that work with syntax
school_summary_calc = school_summary_df.rename(columns={"Avg Math Score": "avg_math_score", "Avg Reading Score": "avg_read_score",
                                       "% Passing Math": "pass_math", "% Passing Reading": "pass_read",
                                        "% Overall Passing Rate": "overall_pass",
                                                       "Total Students": "size"})

#create series for each column we are interested in means by spending
math_spending = school_summary_calc.groupby("spending").avg_math_score.mean()
read_spending = school_summary_calc.groupby("spending").avg_read_score.mean()
pass_math_spend = school_summary_calc.groupby("spending").pass_math.mean()
pass_read_spend = school_summary_calc.groupby("spending").pass_read.mean()
overall_pass_spend = school_summary_calc.groupby("spending").overall_pass.mean()


#merge into single df
spending_summary = pd.DataFrame({"Average Math Score": math_spending,
                                 "Average Reading Score": read_spending,
                                 "% Passing Math": pass_math_spend,
                                 "% Passing Reading": pass_read_spend,
                                 "% Overall Passing Rate": overall_pass_spend})

spending_summary.rename_axis("Spending Ranges(Per Student)", axis='index', inplace=True)
spending_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Spending Ranges(Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$585,83.455399,83.933814,93.460096,96.610877,95.035486
$585-615,83.599686,83.885211,94.230858,95.900287,95.065572
$615-645,79.079225,81.891436,75.668212,86.106569,80.887391
$645-675,76.99721,81.027843,66.164813,81.133951,73.649382


In [14]:
#scores by School size (grouped)
#start with school summary df which has all the columns we need, and create bins
size_bins = [0, 1000, 2000, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]

#cast size as int and remove formatting
school_summary_calc["size"] = school_summary_calc["size"].str.replace(",", "")
school_summary_calc["size"] = school_summary_calc["size"].astype(int)

school_summary_calc["School_Size"] = pd.cut(school_summary_calc["size"], size_bins, labels=group_names)
school_summary_calc

Unnamed: 0,School Type,size,Total School Budget,Per Student Budget,avg_math_score,avg_read_score,pass_math,pass_read,overall_pass,spending,School_Size
,,,,,,,,,,,
Huang High School,District,2917.0,"$1,910,635",655.0,76.629414,81.182722,65.683922,81.316421,73.500171,$645-675,Large (2000-5000)
Figueroa High School,District,2949.0,"$1,884,411",639.0,76.711767,81.15802,65.988471,80.739234,73.363852,$615-645,Large (2000-5000)
Shelton High School,Charter,1761.0,"$1,056,600",600.0,83.359455,83.725724,93.867121,95.854628,94.860875,$585-615,Medium (1000-2000)
Hernandez High School,District,4635.0,"$3,022,020",652.0,77.289752,80.934412,66.752967,80.862999,73.807983,$645-675,Large (2000-5000)
Griffin High School,Charter,1468.0,"$917,500",625.0,83.351499,83.816757,93.392371,97.138965,95.265668,$615-645,Medium (1000-2000)
Wilson High School,Charter,2283.0,"$1,319,574",578.0,83.274201,83.989488,93.867718,96.539641,95.203679,<$585,Large (2000-5000)
Cabrera High School,Charter,1858.0,"$1,081,356",582.0,83.061895,83.97578,94.133477,97.039828,95.586652,<$585,Medium (1000-2000)
Bailey High School,District,4976.0,"$3,124,928",628.0,77.048432,81.033963,66.680064,81.93328,74.306672,$615-645,Large (2000-5000)
Holden High School,Charter,427.0,"$248,087",581.0,83.803279,83.814988,92.505855,96.252927,94.379391,<$585,Small (<1000)


In [15]:

#create series for each column we are interested in means by spending
math_size = school_summary_calc.groupby("School_Size").avg_math_score.mean()
read_size = school_summary_calc.groupby("School_Size").avg_read_score.mean()
pass_math_size = school_summary_calc.groupby("School_Size").pass_math.mean()
pass_read_size = school_summary_calc.groupby("School_Size").pass_read.mean()
overall_pass_size = school_summary_calc.groupby("School_Size").overall_pass.mean()

#merge into single df
size_summary = pd.DataFrame({"Average Math Score": math_size,
                                 "Average Reading Score": read_size,
                                 "% Passing Math": pass_math_size,
                                 "% Passing Reading": pass_read_size,
                                 "% Overall Passing Rate": overall_pass_size})

size_summary.rename_axis("School Size", axis='index', inplace=True)
size_summary

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.821598,83.929843,93.550225,96.099437,94.824831
Medium (1000-2000),83.374684,83.864438,93.599695,96.79068,95.195187
Large (2000-5000),77.746417,81.344493,69.963361,82.766634,76.364998


In [16]:

#create series for each column we are interested in means by spending
math_type = school_summary_calc.groupby("School Type").avg_math_score.mean()
read_type = school_summary_calc.groupby("School Type").avg_read_score.mean()
pass_math_type = school_summary_calc.groupby("School Type").pass_math.mean()
pass_read_type = school_summary_calc.groupby("School Type").pass_read.mean()
overall_pass_type = school_summary_calc.groupby("School Type").overall_pass.mean()

#merge into single df
type_summary = pd.DataFrame({"Average Math Score": math_type,
                                 "Average Reading Score": read_type,
                                 "% Passing Math": pass_math_type,
                                 "% Passing Reading": pass_read_type,
                                 "% Overall Passing Rate": overall_pass_type})

type_summary


Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.473852,83.896421,93.62083,96.586489,95.10366
District,76.956733,80.966636,66.548453,80.799062,73.673757
