In [1]:
import pandas as pd

In [2]:
# Load files
school_data = "Resources/schools_complete.csv"
student_data = "Resources/students_complete.csv"

In [3]:
#Load data into a dataframe
school_df = pd.read_csv(school_data)
student_df = pd.read_csv(student_data)

In [107]:
# Combine school and data dataframes into a single dataframe
school_complete_df = pd.merge(student_df, school_df, how = "left", on = ["school_name", "school_name"])

In [7]:
def convert_boolean_to_numeric(bool_value):
    if bool_value == True:
        return 1
    else:
        return 0    

In [108]:
pass_reading_flag = school_complete_df["reading_score"] >= 70

In [109]:
# Create a reading flag data frame to append to the school_complete_df
reading_pass_flag_df = pd.DataFrame(pass_reading_flag)

In [110]:
reading_pass_flag_df.rename(mapper = {"reading_score" : "reading_pass_flag"}, axis = "columns", inplace = True)

In [111]:
pass_math_flag = school_complete_df["math_score"] >= 70

In [112]:
# Create a reading flag data frame to append to the school_complete_df
math_flag_df = pd.DataFrame(pass_math_flag)

In [113]:
# Rename column header
math_flag_df.rename(mapper = {"math_score" : "math_pass_flag"}, axis = "columns", inplace = True)

In [115]:
# Create a series for teh overall passing flag
pass_overall = (pass_reading_flag & pass_math_flag)

In [116]:
# Create a pass overall dataframe 
pass_overall_df = pd.DataFrame(pass_overall, columns=["pass_overall_flag"])

In [117]:
# Create a dataframe that joins the reading dataframes
new_complete_school_df = school_complete_df.join(reading_pass_flag_df)

In [118]:
# Create a dataframe that joins the math dataframes
flagged_complete_school_df = new_complete_school_df.join(math_flag_df)

In [119]:
# Create a dataframe that joins the overall passing dataframes
flagged_complete_school_df = flagged_complete_school_df.join(pass_overall_df)

In [120]:
# Create a grouping based on the school name
schools = flagged_complete_school_df.groupby("school_name")

In [121]:
# Convert the boolen flags to numeric values in the reading pass data for later calculations
flagged_complete_school_df["reading_pass_flag"] = flagged_complete_school_df["reading_pass_flag"].apply(convert_boolean_to_numeric)

In [122]:
# Convert the boolen flags to numeric values in the math pass data for later calculations
flagged_complete_school_df["math_pass_flag"] = flagged_complete_school_df["math_pass_flag"].apply(convert_boolean_to_numeric)


In [123]:
# Convert the boolen flags to numeric values in the overall pass data for later calculations
flagged_complete_school_df["pass_overall_flag"] = flagged_complete_school_df["pass_overall_flag"].apply(convert_boolean_to_numeric)

In [124]:
# Calculate the total number of schools
total_schools = len(schools)

In [125]:
# Calculate the total budget
budget_df = schools.first()
total_budget = budget_df["budget"].sum()

In [126]:
# Calculat the total number of students
total_students = school_complete_df["Student ID"].count()

# Calculate the average reading score
avg_reading_score = school_complete_df["reading_score"].mean()

# Calculate the average math score
avg_math_score = school_complete_df["math_score"].mean()

# Calculate the % of students that have a passing reading score
read_filter = school_complete_df["reading_score"] >= 70
num_passed_reading = len(school_complete_df[read_filter])
percent_passing_reading = num_passed_reading / total_students

# Calculate the % of students that have a passing math score
math_filter = school_complete_df["math_score"] >= 70
num_passed_math = len(school_complete_df[math_filter])
percent_passing_math = num_passed_math / total_students

# Calculate the % of students that have a passing math and reading score
overall_filter = len(school_complete_df[read_filter & math_filter])
percent_passing_overall = overall_filter / total_students


## District Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average math score 

* Calculate the average reading score

* Calculate the percentage of students with a passing math score (70 or greater)

* Calculate the percentage of students with a passing reading score (70 or greater)

* Calculate the percentage of students who passed math **and** reading (% Overall Passing)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

In [127]:
# Create a dataframe to show the district summary
district_summary_df = pd.DataFrame(
    {"Total Schools": [total_schools],
     "Total Students": [f'{total_students:,}'],
     "Total Budget" : [f'${total_budget:,}'],
     "Avg Math Score" : [f'{(avg_math_score / 100):.2%}'],
     "Avg Reading Score" : [f'{(avg_reading_score / 100):.2%}'],
     "% Passing Math" : [f'{(percent_passing_math):.2%}'],
     "% Passing Reading" : [f'{percent_passing_reading:.2%}'],
     "% Overall Passing" : [f'{percent_passing_overall:.2%}']
     }
)

district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Avg Math Score,Avg Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428",78.99%,81.88%,74.98%,85.81%,65.17%


In [128]:
type_budget_df = schools[["type", "budget"]].first()

In [130]:
# Calculate the values for each column using the agg function
school_summary1_df = schools.agg({"type" : "first",
                                 "Student ID" : "count",
                                 "budget" : "first",
                                 "math_score" : "mean",
                                 "reading_score" : "mean",
                                 "reading_pass_flag" : "sum",
                                 "math_pass_flag" : "sum",
                                 "pass_overall_flag" : "sum"})

# Rename the column headers
school_summary1_df.rename(mapper = {"type" : "School Type",
                                    "Student ID" : "Total Students",
                                    "budget" : "Total School Budget",
                                    "math_score" : "Average Math Score",
                                    "reading_score" : "Average Reading Score",
                                    "reading_pass_flag" : "Total Students Passing Reading",
                                    "math_pass_flag" : "Total Students Passing Math",
                                    "pass_overall_flag" : "Total Students Passing Overall"}, axis = "columns", inplace = True)



In [131]:
# Calculate the budget per student
budget_student = school_summary1_df["Total School Budget"] / school_summary1_df["Total Students"]

# Calculate the passing percentages for each school
percent_passing_math = (school_summary1_df["Total Students Passing Math"] / school_summary1_df["Total Students"]) * 100
percent_passing_reading = (school_summary1_df["Total Students Passing Reading"] / school_summary1_df["Total Students"]) * 100
percent_passing_overall = (school_summary1_df["Total Students Passing Overall"] / school_summary1_df["Total Students"]) * 100

# Convert the budget per student series into a data frame
budget_student_df = pd.DataFrame({"Per Student Budget" :budget_student,
                                  "% Passing Math" : percent_passing_math,
                                  "% Passing Reading" : percent_passing_reading,
                                  "% Overall Passing" : percent_passing_overall})


In [132]:
# Join the Per Student Budget data column to the data frame using a .join method
new_school_summary_df = school_summary1_df.join(budget_student_df)


## School Summary

* Create an overview table that summarizes key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * % Overall Passing (The percentage of students that passed math **and** reading.)
  
* Create a dataframe to hold the above results

In [32]:
# Set the column order for the updated data frame
column_order = ["School Type", "Total Students", "Total School Budget", "Per Student Budget",
                "Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"]

# Apply the column order to the new data frame and save with a new dataframe name
school_summary_with_budget_df = new_school_summary_df.reindex(columns = column_order)
school_summary_with_budget_df

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,District,2739,1763916,644.0,77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,District,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


## Top Performing Schools (By % Overall Passing)

In [33]:
school_summary_with_budget_df.sort_values("% Overall Passing", ascending = False).head(5)

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Thomas High School,Charter,1635,1043130,638.0,83.418349,83.84893,93.272171,97.308869,90.948012
Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Wilson High School,Charter,2283,1319574,578.0,83.274201,83.989488,93.867718,96.539641,90.582567
Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


## Bottom Performing Schools (By % Overall Passing)

In [34]:
school_summary_with_budget_df.sort_values("% Overall Passing", ascending = True).head(5)

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,District,3999,2547363,637.0,76.842711,80.744686,66.366592,80.220055,52.988247
Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Johnson High School,District,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,53.539172


## Math Scores by Grade

* Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

In [133]:
# Reset the datafame index to use the school name
flagged_school_df = flagged_complete_school_df.set_index("school_name")

In [134]:
# Create a series for ninth grade math scores
nine = flagged_school_df['grade'] == '9th'
ninth_grade = flagged_school_df[nine]
ninth_grade = ninth_grade['math_score']

In [135]:
#Create a DataFrame for ninth grade math scores
ninth_grade_df = pd.DataFrame(ninth_grade)

In [136]:
# Rename the column name
ninth_grade_df.rename(mapper = {'math_score': '9th'}, axis = 'columns', inplace = True)

In [137]:
# Group by school
ninth_grade_df = ninth_grade_df.groupby("school_name")

In [138]:
# Get the average math score by school
ninth = ninth_grade_df["9th"].mean()

In [139]:
# Recreate the DataFrame summarized by school
ninth_grade_df = pd.DataFrame(ninth)

In [140]:
# Create a series for tenth grade math scores
ten = flagged_school_df['grade'] == '10th'
tenth_grade = flagged_school_df[ten]
tenth_grade = tenth_grade['math_score']

In [141]:
# Create a DataFrame for tenth grade math scores
tenth_grade_df = pd.DataFrame(tenth_grade)

In [142]:
# Rename the column
tenth_grade_df.rename(mapper = {"math_score" : "10th"}, axis = "columns", inplace = True)

In [143]:
# Group by school
tenth_grade_df = tenth_grade_df.groupby("school_name")

In [144]:
# Get the average math score by school
tenth = tenth_grade_df["10th"].mean()

In [145]:
# Recreate the DataFrame summarized by school
tenth_grade_df = pd.DataFrame(tenth)

In [146]:
# Create a series for eleventh grade math scores
eleven = flagged_school_df['grade'] == '11th'
eleventh_grade = flagged_school_df[eleven]
eleventh_grade = eleventh_grade['math_score']

In [147]:
# Create a DataFrame for eleventh  grade math scores
eleventh_grade_df = pd.DataFrame(eleventh_grade)

In [148]:
# Rename the column
eleventh_grade_df.rename(mapper = {"math_score" : "11th"}, axis = "columns", inplace = True)

In [149]:
# Group by school
eleventh_grade_df = eleventh_grade_df.groupby("school_name")

In [150]:
# Get the average math score by school
eleventh = eleventh_grade_df["11th"].mean()

In [151]:
# Recreate the DataFrame summarized by school
eleventh_grade_df = pd.DataFrame(eleventh)

In [152]:
# Create a series for twelfth grade math scores
twelve = flagged_school_df['grade'] == '12th'
twelfth_grade = flagged_school_df[twelve]
twelfth_grade = twelfth_grade['math_score']

In [153]:
# Create a DataFrame for twelfth grade math scores
twelfth_grade_df = pd.DataFrame(twelfth_grade)

In [154]:
# Rename the column
twelfth_grade_df.rename(mapper = {"math_score" : "12th"}, axis = "columns", inplace = True)

In [155]:
# Group by school
twelfth_grade_df = twelfth_grade_df.groupby("school_name")

In [156]:
# Get the average math score by school
twelfth = twelfth_grade_df["12th"].mean()

In [157]:
# Recreate the DataFrame summarized by school
twelfth_grade_df = pd.DataFrame(twelfth)

In [158]:
# Join the 9th and 10th grade columns together
math_scores_by_grade_df = ninth_grade_df.join(tenth_grade_df)

In [159]:
# Join the 11th grade column
math_scores_by_grade_df = math_scores_by_grade_df.join(eleventh_grade_df)

In [160]:
# Join the 12th grade column
math_scores_by_grade_df = math_scores_by_grade_df.join(twelfth_grade_df)

In [161]:
math_scores_by_grade_df

Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248
Pena High School,83.625455,83.372,84.328125,84.121547


## Reading Scores by Grade

In [162]:
# Create a series for ninth grade reading scores
nine = flagged_school_df['grade'] == '9th'
ninth_grade = flagged_school_df[nine]
ninth_grade = ninth_grade['reading_score']

In [163]:
#Create a DataFrame for ninth grade reading scores
ninth_grade_df = pd.DataFrame(ninth_grade)

In [164]:
# Rename the column name
ninth_grade_df.rename(mapper = {'reading_score': '9th'}, axis = 'columns', inplace = True)

In [165]:
# Group by school
ninth_grade_df = ninth_grade_df.groupby("school_name")

In [166]:
# Get the average reading score by school
ninth = ninth_grade_df["9th"].mean()

In [167]:
# Recreate the DataFrame summarized by school
ninth_grade_df = pd.DataFrame(ninth)

In [168]:
# Create a series for tenth grade reading scores
ten = flagged_school_df['grade'] == '10th'
tenth_grade = flagged_school_df[ten]
tenth_grade = tenth_grade['reading_score']

In [169]:
# Create a DataFrame for tenth grade reading scores
tenth_grade_df = pd.DataFrame(tenth_grade)

In [170]:
# Rename the column
tenth_grade_df.rename(mapper = {"reading_score" : "10th"}, axis = "columns", inplace = True)

In [171]:
# Group by school
tenth_grade_df = tenth_grade_df.groupby("school_name")

In [172]:
# Get the average reading score by school
tenth = tenth_grade_df["10th"].mean()

In [173]:
# Recreate the DataFrame summarized by school
tenth_grade_df = pd.DataFrame(tenth)

In [174]:
# Create a series for eleventh grade reading scores
eleven = flagged_school_df['grade'] == '11th'
eleventh_grade = flagged_school_df[eleven]
eleventh_grade = eleventh_grade['reading_score']

In [175]:
# Create a DataFrame for eleventh  grade reading scores
eleventh_grade_df = pd.DataFrame(eleventh_grade)

In [176]:
# Rename the column
eleventh_grade_df.rename(mapper = {"reading_score" : "11th"}, axis = "columns", inplace = True)

In [177]:
# Group by school
eleventh_grade_df = eleventh_grade_df.groupby("school_name")

In [178]:
# Get the average reading score by school
eleventh = eleventh_grade_df["11th"].mean()

In [179]:
# Recreate the DataFrame summarized by school
eleventh_grade_df = pd.DataFrame(eleventh)

In [180]:
# Create a series for twelfth grade reading scores
twelve = flagged_school_df['grade'] == '12th'
twelfth_grade = flagged_school_df[twelve]
twelfth_grade = twelfth_grade['reading_score']

In [181]:
# Create a DataFrame for twelfth grade reading scores
twelfth_grade_df = pd.DataFrame(twelfth_grade)

In [182]:
# Rename the column
twelfth_grade_df.rename(mapper = {"reading_score" : "12th"}, axis = "columns", inplace = True)

In [183]:
# Group by school
twelfth_grade_df = twelfth_grade_df.groupby("school_name")

In [184]:
# Get the average reading score by school
twelfth = twelfth_grade_df["12th"].mean()

In [185]:
# Recreate the DataFrame summarized by school
twelfth_grade_df = pd.DataFrame(twelfth)

In [186]:
# Join the 9th and 10th grade columns together
reading_scores_by_grade_df = ninth_grade_df.join(tenth_grade_df)

In [187]:
# Join the 11th grade column
reading_scores_by_grade_df = reading_scores_by_grade_df.join(eleventh_grade_df)

In [188]:
# Join the 12th grade column
reading_scores_by_grade_df = reading_scores_by_grade_df.join(twelfth_grade_df)

In [189]:
reading_scores_by_grade_df

Unnamed: 0_level_0,9th,10th,11th,12th
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.303155,80.907183,80.945643,80.912451
Cabrera High School,83.676136,84.253219,83.788382,84.287958
Figueroa High School,81.198598,81.408912,80.640339,81.384863
Ford High School,80.632653,81.262712,80.403642,80.662338
Griffin High School,83.369193,83.706897,84.288089,84.013699
Hernandez High School,80.86686,80.660147,81.39614,80.857143
Holden High School,83.677165,83.324561,83.815534,84.698795
Huang High School,81.290284,81.512386,81.417476,80.305983
Johnson High School,81.260714,80.773431,80.616027,81.227564
Pena High School,83.807273,83.612,84.335938,84.59116


## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

In [190]:
# Define a function that will create a spending range based on the budget per student
def spending_range(num1):
    if num1 >= 645:
        sp_range = "645-675"
    elif num1 >= 630:
        sp_range = "630-644"
    elif num1 >= 585:
        sp_range = "585-629"
    else:
        sp_range = "<584"
        
    return sp_range

In [191]:
# Use the existing df and give it a new name with the new columns
school_scores_df = flagged_school_df

In [192]:
# Calculate the budget per school
budget_per_school = school_scores_df["budget"] / school_scores_df["size"]

# Add 2 new columns to the dataframe and populuate the values
school_scores_df["Spending"] = budget_per_school
school_scores_df["Spending Category"] = school_scores_df["Spending"].apply(spending_range)


In [193]:
# Group the dataframe by the new spending category created above
spending_category_df = school_scores_df.groupby("Spending Category")

In [194]:
# Calculate total students per category
total_students_per_category = spending_category_df["Spending Category"].count()

# Calculate the average math score
avg_math_score = spending_category_df["math_score"].mean()

# Calculate the average reading score
avg_reading_score = spending_category_df["reading_score"].mean()

# Calculate the passing percentages for each school
percent_passing_math = spending_category_df["math_pass_flag"].sum() / total_students_per_category
percent_passing_reading = spending_category_df["reading_pass_flag"].sum() / total_students_per_category
percent_passing_overall = spending_category_df["pass_overall_flag"].sum() / total_students_per_category

# Convert the budget per student series into a data frame
spending_category_df = pd.DataFrame({"Average Math Score" : avg_math_score,
                                     "Average Reading Score" : avg_reading_score,
                                     "% Passing Math" : percent_passing_math,
                                     "% Passing Reading" : percent_passing_reading,
                                     "% Overall Passing" : percent_passing_overall})

spending_category_df.sort_values("Spending Category", inplace = True, ascending = False)

In [195]:
spending_category_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Spending Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$584,83.363065,83.964039,0.937029,0.966866,0.906407
$645-675,77.049297,81.005604,0.662308,0.811094,0.535288
$630-644,77.821056,81.301007,0.706236,0.826002,0.588412
$585-629,79.982873,82.312643,0.791099,0.885131,0.709392


## Scores by School Size

In [196]:
# Define a function that will create a school size category based on student population
def school_size(num1):
    if num1 >= 2000:
        size_group = "Large (2000-5000)"
    elif num1 >= 1000:
        size_group = "Medium (1000-2000)"
    else:
        size_group = "Small (<1000)"    
        
    return size_group

In [197]:
# Use the existing df and give it a new name with the new columns
school_size_df = flagged_school_df

In [198]:
# Add 2 new columns to the dataframe and populuate the values
school_size_df["Spending Ranges (Per Student)"] = school_size_df["size"].apply(school_size)

In [199]:
# Group the dataframe by the new school size category created above
school_size_df = school_size_df.groupby("Spending Ranges (Per Student)")

In [200]:
# Calculate total students per category
total_students_per_category = school_size_df["Spending Ranges (Per Student)"].count()

# Calculate the average math score
avg_math_score = school_size_df["math_score"].mean()

# Calculate the average reading score
avg_reading_score = school_size_df["reading_score"].mean()

# Calculate the passing percentages for each school
percent_passing_math = (school_size_df["math_pass_flag"].sum() / total_students_per_category) * 100
percent_passing_reading = (school_size_df["reading_pass_flag"].sum() / total_students_per_category) * 100
percent_passing_overall = (school_size_df["pass_overall_flag"].sum() / total_students_per_category) * 100

# Convert the budget per student series into a data frame
school_size_df = pd.DataFrame({"Average Math Score" : avg_math_score,
                                     "Average Reading Score" : avg_reading_score,
                                     "% Passing Math" : percent_passing_math,
                                     "% Passing Reading" : percent_passing_reading,
                                     "% Overall Passing" : percent_passing_overall})

school_size_df.sort_values("Spending Ranges (Per Student)", inplace = True, ascending = False)

In [201]:
school_size_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
Spending Ranges (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.828654,83.974082,93.952484,96.040317,90.136789
Medium (1000-2000),83.372682,83.867989,93.616522,96.773058,90.624267
Large (2000-5000),77.477597,81.198674,68.65238,82.125158,56.574046


## Scores by School Type

In [202]:
# Group the dataframe by the Charter and District categories
school_type_df = flagged_school_df.groupby("type")

In [203]:
# Calculate total students per category
total_students_per_category = school_type_df["type"].count()

# Calculate the average math score
avg_math_score = school_type_df["math_score"].mean()

# Calculate the average reading score
avg_reading_score = school_type_df["reading_score"].mean()

# Calculate the passing percentages for each school
percent_passing_math = (school_type_df["math_pass_flag"].sum() / total_students_per_category) * 100
percent_passing_reading = (school_type_df["reading_pass_flag"].sum() / total_students_per_category) * 100
percent_passing_overall = (school_type_df["pass_overall_flag"].sum() / total_students_per_category) * 100

# Convert the budget per student series into a data frame
school_type_df = pd.DataFrame({"Average Math Score" : avg_math_score,
                                     "Average Reading Score" : avg_reading_score,
                                     "% Passing Math" : percent_passing_math,
                                     "% Passing Reading" : percent_passing_reading,
                                     "% Overall Passing" : percent_passing_overall})

school_type_df.sort_values("type", inplace = True, ascending = True)

In [204]:
school_type_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.406183,83.902821,93.701821,96.645891,90.560932
District,76.987026,80.962485,66.518387,80.905249,53.695878
