In [21]:
# dependencies
import pandas as pd
import os
import numpy as np

# data files
school_data = os.path.join("..","Resources","schools_complete.csv")
student_data = os.path.join("..","Resources","students_complete.csv")

# read csv files into a dataframe
school_df = pd.read_csv(school_data)
student_df = pd.read_csv(student_data)

# combine dataframes
school_data_complete = pd.merge(student_df, school_df, how="left", on=["school_name", "school_name"])

# District Summary

Snapshot of key metrics for the PyCity School District

In [146]:
# Total schools
total_schools = school_data_complete['school_name'].nunique()

#Total students
total_students = school_data_complete['student_name'].count()

#Total budget - use schools_df to not count school budgets multiple times
total_budget = school_df['budget'].sum()

#Average math score
avg_math_score = school_data_complete['math_score'].mean()

#Average reading score
avg_reading_score = school_data_complete['reading_score'].mean()

#% passing math (the percentage of students who passed math)
passing = school_data_complete[school_data_complete['math_score']>=70].count()[6]
passing_math_percent = (passing/total_students)*100

#% passing reading (the percentage of students who passed reading)
passing = school_data_complete[school_data_complete['reading_score']>=70].count()[5]
passing_reading_percent = (passing/total_students)*100

#% overall passing (the percentage of students who passed math AND reading)
passing = school_data_complete[(school_data_complete['math_score']>=70) & (school_data_complete['reading_score']>=70)].count()[6]
passing_both_percent = (passing/total_students)*100

# create summary data frame
summary = pd.DataFrame(
    {"Total Schools":[total_schools],
     "Total Budget":[total_budget],
     "Average Math Score":[avg_math_score],
     "Average Reading Score":[avg_reading_score],
     "% Passing Math":[passing_math_percent],
     "% Passing Reading":[passing_reading_percent],
     "% Overall Passing":[passing_both_percent]   
    }
)

# format values
summary["Total Budget"] = summary["Total Budget"].map("${:,.2f}".format)
summary["% Passing Math"] = summary["% Passing Math"].map("{:.2f}%".format)
summary["% Passing Reading"] = summary["% Passing Reading"].map("{:.2f}%".format)
summary["% Overall Passing"] = summary["% Overall Passing"].map("{:.2f}%".format)

summary

Unnamed: 0,Total Schools,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,"$24,649,428.00",78.985371,81.87784,74.98%,85.81%,65.17%


# School Summary

Key metrics for each school in the PyCity School District

In [199]:
# group data by school name
school_data_grouped = school_data_complete.groupby(["school_name"])

In [216]:
school_type = school_data_grouped['type'].unique()
school_type = 
school_type

TypeError: sequence item 0: expected str instance, numpy.ndarray found

In [201]:
total_students = school_data_grouped['student_name'].count()

In [202]:
# group just the school data for budget totals
just_school_grouped = school_df.groupby("school_name")
total_budget = just_school_grouped['budget'].sum()

In [203]:
budget_per_student = (total_budget/total_students)

In [204]:
avg_math_score = school_data_grouped['math_score'].mean()

In [205]:
avg_reading_score = school_data_grouped['reading_score'].mean()

In [206]:
passing = school_data_complete[school_data_complete['math_score']>=70].groupby("school_name").count()
passing_math_percent = (passing['math_score']/total_students)*100

In [207]:
passing = school_data_complete[school_data_complete['reading_score']>=70].groupby("school_name").count()
passing_reading_percent = (passing['reading_score']/total_students)*100

In [208]:
passing = school_data_complete[(school_data_complete['math_score']>=70) & (school_data_complete['reading_score']>=70)].groupby("school_name").count()
passing_both_percent = (passing['math_score']/total_students)*100

In [211]:
# create summary table
summary_df = pd.DataFrame({
    "School Type": school_type,
    "Total Students": total_students,
    "Total Budget": total_budget, 
    "Budget per Student": budget_per_student, 
    "Average Math Score":avg_math_score, 
    "Average Reading Score":avg_reading_score, 
    "% Passing Math": passing_math_percent,
    "% Passing Reading": passing_reading_percent, 
    "% Overall Passing": passing_both_percent
})

In [212]:
# format table
summary_df["Total Budget"] = summary_df["Total Budget"].map("${:,.2f}".format)
summary_df["Budget per Student"] = summary_df["Budget per Student"].map("${:,.2f}".format)
summary_df["% Passing Math"] = summary_df["% Passing Math"].map("{:.2f}%".format)
summary_df["% Passing Reading"] = summary_df["% Passing Reading"].map("{:.2f}%".format)
summary_df["% Overall Passing"] = summary_df["% Overall Passing"].map("{:.2f}%".format)

In [213]:
summary_df

Unnamed: 0_level_0,School Type,Total Students,Total Budget,Budget per Student,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,[District],4976,"$3,124,928.00",$628.00,77.048432,81.033963,66.68%,81.93%,54.64%
Cabrera High School,[Charter],1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.13%,97.04%,91.33%
Figueroa High School,[District],2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.99%,80.74%,53.20%
Ford High School,[District],2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.31%,79.30%,54.29%
Griffin High School,[Charter],1468,"$917,500.00",$625.00,83.351499,83.816757,93.39%,97.14%,90.60%
Hernandez High School,[District],4635,"$3,022,020.00",$652.00,77.289752,80.934412,66.75%,80.86%,53.53%
Holden High School,[Charter],427,"$248,087.00",$581.00,83.803279,83.814988,92.51%,96.25%,89.23%
Huang High School,[District],2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.68%,81.32%,53.51%
Johnson High School,[District],4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.06%,81.22%,53.54%
Pena High School,[Charter],962,"$585,858.00",$609.00,83.839917,84.044699,94.59%,95.95%,90.54%


# Highest-Performing Schools (by % Overall Passing)