In [2]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
school_raw = "Resources/schools_complete.csv"
student_raw = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
scdf = pd.read_csv(school_raw)
stdf = pd.read_csv(student_raw)

# Combine the data into a single dataset
cdf = pd.merge(scdf, stdf, how="left", on=["school_name", "school_name"])

# Create new df with only the necessary columns
ccdf = cdf[['size', 'reading_score', 'math_score']]
ccdf = pd.DataFrame(ccdf)

# Create variable columns needed for the final output variables
ccdf['bps'] = cdf['budget'] / cdf['size']
ccdf['pm'] = cdf['math_score'] >= 70
ccdf['pr'] = cdf['reading_score'] >= 70

# Create the bins that we will use to group our data
bins = [0, 1000, 2000, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]
ccdf['School Size'] = pd.cut(ccdf["size"], bins, labels=group_names)

# Create groupby object
gbo = ccdf.groupby('School Size')

# Create new df by running agg function on our groupby object 
gdf = gbo.agg({  "reading_score":"mean",
                  "math_score":"mean", 
                  "pm":"sum", 
                  "pr":"sum"
               })

# Create a new column to hold the total count variable to get our % Passing columns
gdf['tc'] = gbo['pm'].count()


gdf['% Passing Math'] = gdf.pm / gdf.tc
gdf['% Passing Reading'] = gdf.pr / gdf.tc
gdf['% Overall Passing Rate'] = (gdf['% Passing Reading'] + gdf['% Passing Math']) / 2

fdf = gdf.rename(columns={'reading_score':'Average Reading Score',
                           'math_score':'Average Math Score'})

fdf = fdf[[ 'Average Math Score','Average Reading Score', '% Passing Math', '% Passing Reading', '% Overall Passing Rate']]

fdf.style

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.8287,83.9741,0.939525,0.960403,0.949964
Medium (1000-2000),83.3727,83.868,0.936165,0.967731,0.951948
Large (2000-5000),77.4776,81.1987,0.686524,0.821252,0.753888
