In [41]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
school_raw = "Resources/schools_complete.csv"
student_raw = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
scdf = pd.read_csv(school_raw)
stdf = pd.read_csv(student_raw)

# Combine the data into a single dataset
cdf = pd.merge(scdf, stdf, how="left", on=["school_name", "school_name"])

cdf["% Reading"] = cdf['reading_score'] >= 70
cdf["% Math"] = cdf['math_score'] >= 70
# cdf["Per Student Budget"] = cdf.budget.unique()/cdf.size.unique()

ccdf = cdf[[
 'school_name',
 'type',
 'size',
 'budget',
 'reading_score',
 'math_score',
 '% Reading',
 '% Math'
 ]]

gcdf = ccdf.groupby(cdf.school_name)

# Run functions on all columns with agg. to acquire necessary variables
ngcdf = gcdf.agg({"school_name":"min",
                  "type": "min", 
                  "size":"min", 
                  "budget":"min",
                  "reading_score":"mean",
                  "math_score":"mean", 
                  "% Math":"sum", 
                  "% Reading":"sum"})
ngcdf = pd.DataFrame(ngcdf)

# Create 4 new columns
ngcdf['% Passing Math'] = ngcdf['% Math']/ngcdf['size']*100
ngcdf['% Passing Reading'] = ngcdf['% Reading']/ngcdf['size']*100
ngcdf['Per Student Budget'] = ngcdf['budget']/ngcdf['size']
ngcdf['% Overall Passing Rate'] = (ngcdf['% Passing Math'] + ngcdf['% Passing Reading'])/2
# Format columns for readability
ngcdf["size"] = ngcdf["size"].map("{:,}".format)
ngcdf["budget"] = ngcdf["budget"].map("${:,.2f}".format)
ngcdf["Per Student Budget"] = ngcdf["Per Student Budget"].map("${:,.2f}".format)
ngcdf["math_score"] = ngcdf["math_score"].map("{:.2f}%".format)
ngcdf["reading_score"] = ngcdf["reading_score"].map("{:.2f}%".format)
ngcdf["% Passing Math"] = ngcdf["% Passing Math"].map("{:.2f}%".format)
ngcdf["% Passing Reading"] =ngcdf["% Passing Reading"].map("{:.2f}%".format)
ngcdf["% Overall Passing Rate"] = ngcdf["% Overall Passing Rate"].map("{:.2f}%".format)

ndf = ngcdf[['type', 
           'size',
           'budget', 
           'Per Student Budget',
           'math_score',
           'reading_score',
           '% Passing Math',
           '% Passing Reading',
           '% Overall Passing Rate']]

nndf = ndf.rename(
    columns={"type": "School Type",
             "size": "Total Students",
             "budget":"Total School Budget",
             "math_score":"Average Math Score",
             "reading_score":"Average Reading Score"})
del nndf.index.name

In [42]:
n3df = nndf.sort_values("% Overall Passing Rate", ascending=False)
n3df.iloc[:5].style

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.06%,83.98%,94.13%,97.04%,95.59%
Thomas High School,Charter,1635,"$1,043,130.00",$638.00,83.42%,83.85%,93.27%,97.31%,95.29%
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.35%,83.82%,93.39%,97.14%,95.27%
Pena High School,Charter,962,"$585,858.00",$609.00,83.84%,84.04%,94.59%,95.95%,95.27%
Wilson High School,Charter,2283,"$1,319,574.00",$578.00,83.27%,83.99%,93.87%,96.54%,95.20%


In [43]:
n4df = nndf.sort_values("% Overall Passing Rate", ascending=True)
n4df.iloc[:5].style

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Rodriguez High School,District,3999,"$2,547,363.00",$637.00,76.84%,80.74%,66.37%,80.22%,73.29%
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.71%,81.16%,65.99%,80.74%,73.36%
Huang High School,District,2917,"$1,910,635.00",$655.00,76.63%,81.18%,65.68%,81.32%,73.50%
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.07%,80.97%,66.06%,81.22%,73.64%
Ford High School,District,2739,"$1,763,916.00",$644.00,77.10%,80.75%,68.31%,79.30%,73.80%
