In [268]:
# dependencies
import pandas as pd
import os
import numpy as np

# data files
school_data = os.path.join("..","Resources","schools_complete.csv")
student_data = os.path.join("..","Resources","students_complete.csv")

# read csv files into a dataframe
school_df = pd.read_csv(school_data)
student_df = pd.read_csv(student_data)

# combine dataframes
school_data_complete = pd.merge(student_df, school_df, how="left", on=["school_name", "school_name"])
#school_data_complete.head()

In [126]:
# Total schools
total_schools = school_data_complete['school_name'].nunique()

In [127]:
#Total students
total_students = school_data_complete['student_name'].count()

In [128]:
#Total budget
total_budget = school_data_complete['budget'].sum()

In [129]:
#Average math score
avg_math_score = school_data_complete['math_score'].mean()

In [130]:
#Average reading score
avg_reading_score = school_data_complete['reading_score'].mean()

In [131]:
#% passing math (the percentage of students who passed math)
passing = school_data_complete[school_data_complete['math_score']>=70].count()[6]
passing_math_percent = (passing/total_students)*100

In [132]:
#% passing reading (the percentage of students who passed reading)
passing = school_data_complete[school_data_complete['reading_score']>=70].count()[5]
passing_reading_percent = (passing/total_students)*100

In [134]:
#% overall passing (the percentage of students who passed math AND reading)
passing = school_data_complete[(school_data_complete['math_score']>=70) & (school_data_complete['reading_score']>=70)].count()[6]
passing_both_percent = (passing/total_students)*100

In [135]:
# create summary data frame
summary = pd.DataFrame(
    {"Total Schools":[total_schools],
     "Total Budget":[np.around(total_budget,2)],
     "Average Math Score":[avg_math_score],
     "Average Reading Score":[avg_reading_score],
     "% Passing Math":[passing_math_percent],
     "% Passing Reading":[passing_reading_percent],
     "% Overall Passing":[passing_both_percent]   
    }
)

summary

# TO DO - format values

Unnamed: 0,Total Schools,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,82932329558,78.985371,81.87784,74.980853,85.805463,65.172326


In [None]:
# group data by school name
school_data_grouped = school_data_complete.groupby("school_name")

In [265]:
school_type = school_data_grouped['type'].unique()

In [217]:
total_students = school_data_grouped['student_name'].count()

In [243]:
# group just the school data for budget totals
just_school_grouped = school_df.groupby("school_name")
total_budget = just_school_grouped['budget'].sum()

In [244]:
budget_per_student = (total_budget/total_students)

In [245]:
avg_math_score = school_data_grouped['math_score'].mean()

In [246]:
avg_reading_score = school_data_grouped['reading_score'].mean()

In [247]:
passing = school_data_complete[school_data_complete['math_score']>=70].groupby("school_name").count()
passing_math_percent = (passing['math_score']/total_students)*100

In [248]:
passing = school_data_complete[school_data_complete['reading_score']>=70].groupby("school_name").count()
passing_reading_percent = (passing['reading_score']/total_students)*100

In [249]:
passing = school_data_complete[(school_data_complete['math_score']>=70) & (school_data_complete['reading_score']>=70)].groupby("school_name").count()
passing_both_percent = (passing['math_score']/total_students)*100

In [267]:
# summary table
summary_df = pd.concat([school_type,total_students,total_budget, budget_per_student, 
                        avg_math_score, avg_reading_score, passing_math_percent,
                        passing_reading_percent, passing_both_percent],
                    axis=1, 
                    keys=['School Type','Total Students','Total Budget','Budget per Student','Average Math Score',
                          'Average Reading Score','% Passing Math','% Passing Reading','% Overall Passing']
                    )

summary_df

Unnamed: 0_level_0,School Type,Total Students,Total Budget,Budget per Student,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,[District],4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,[Charter],1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,[District],2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,[District],2739,1763916,644.0,77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,[Charter],1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,[District],4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,[Charter],427,248087,581.0,83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,[District],2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,[District],4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,[Charter],962,585858,609.0,83.839917,84.044699,94.594595,95.945946,90.540541
