# Py City School District Analysis

In [1]:
import pandas as pd
school_data = pd.read_csv('Resources/schools_complete.csv')
student_data = pd.read_csv('Resources/students_complete.csv')

complete_school_data = pd.merge(student_data, school_data, how="left", on="school_name")
# complete_school_data.head() - verified merge

## Creating District Summary Table

In [2]:
#Calculating number of schools:
schools = len(school_data.school_name)

#Calulating number of students and storing a formatted version for the table:
students = student_data.student_name.count()
students_formatted = '{:,.0f}'.format(students)

#Calulating budget and storing a formatted version for the table:
budget = school_data.budget.sum()
budget_formatted = '${:,.2f}'.format(float(budget))

#Calculating average math and reading scores:
avg_math = round(student_data.math_score.mean(), 6)
avg_reading = round(student_data.reading_score.mean(),6)

#Verified outputs
#print(schools, students, students_formatted, budget, budget_formatted, avg_math, avg_reading)

In [3]:
#Calculating passing percentages:
pct_pass_math = (len(student_data.loc[(student_data['math_score']>=70)])/students)*100
pct_pass_reading = (len(student_data.loc[(student_data['reading_score']>=70)])/students)*100
pct_pass_combined = (len(student_data.loc[(student_data['math_score']>=70) & 
                                                (student_data['reading_score']>=70)])/students)*100
#Verified outputs
#print(pct_pass_math, pct_pass_reading, pct_pass_combined)

In [5]:
#Building Data Frame
district_summary = pd.DataFrame({'Total Schools':[schools],
                                 'Total Students': [students_formatted],
                                 'Total Budget': [budget_formatted],
                                 'Average Math Score': [avg_math],
                                 'Average Reading Score': [avg_reading],
                                 '% Passing Math': [pct_pass_math],
                                 '% Passing Reading': [pct_pass_reading],
                                 '% Overall Passing': [pct_pass_combined]
                                })
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


## Creating School Summary Table

In [6]:
# Creating table using groupby.agg to apply different functions to each column:
school_summary = complete_school_data.groupby('school_name').agg({'type': ['unique'],
                                                              'student_name': ['count'],
                                                               'budget': ['unique'],
                                                               'math_score': ['mean'],
                                                               'reading_score': ['mean'],
                                                               })

In [7]:
# Renaming Columns
school_summary.columns = ['Type', 'Total Students', 'Budget', 'Average Math Score', 
                          'Average Read Score']

In [8]:
# Calculating Budget per Student and adding column
school_summary['Budget per Student'] = school_summary.Budget/school_summary['Total Students']

In [9]:
# Calculating Percent Passing Math and adding column
passing_math_groupby = complete_school_data.loc[complete_school_data.math_score >= 70, :].groupby("school_name").count()
pct_math_passing = (passing_math_groupby.math_score/school_summary['Total Students'])*100
school_summary['% Passing Math'] = pct_math_passing

In [10]:
# Calculating Percent Passing Reading and adding column
passing_reading_groupby = complete_school_data.loc[complete_school_data.reading_score >= 70, :].groupby("school_name").count()
pct_reading_passing = (passing_reading_groupby.reading_score/school_summary['Total Students'])*100
school_summary['% Passing Reading'] = pct_reading_passing

In [11]:
# Calculating Percent Overall Passing and adding column
passing_overall_groupby = complete_school_data.loc[(complete_school_data.math_score >= 70) & (complete_school_data.reading_score >= 70),:].groupby("school_name").count()
pct_overall_passing = (passing_overall_groupby.grade/school_summary['Total Students'])*100
school_summary['% Overall Passing'] = pct_overall_passing
school_summary

Unnamed: 0_level_0,Type,Total Students,Budget,Average Math Score,Average Read Score,Budget per Student,% Passing Math,% Passing Reading,% Overall Passing
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,[District],4976,[3124928],77.048432,81.033963,[628.0],66.680064,81.93328,54.642283
Cabrera High School,[Charter],1858,[1081356],83.061895,83.97578,[582.0],94.133477,97.039828,91.334769
Figueroa High School,[District],2949,[1884411],76.711767,81.15802,[639.0],65.988471,80.739234,53.204476
Ford High School,[District],2739,[1763916],77.102592,80.746258,[644.0],68.309602,79.299014,54.289887
Griffin High School,[Charter],1468,[917500],83.351499,83.816757,[625.0],93.392371,97.138965,90.599455
Hernandez High School,[District],4635,[3022020],77.289752,80.934412,[652.0],66.752967,80.862999,53.527508
Holden High School,[Charter],427,[248087],83.803279,83.814988,[581.0],92.505855,96.252927,89.227166
Huang High School,[District],2917,[1910635],76.629414,81.182722,[655.0],65.683922,81.316421,53.513884
Johnson High School,[District],4761,[3094650],77.072464,80.966394,[650.0],66.057551,81.222432,53.539172
Pena High School,[Charter],962,[585858],83.839917,84.044699,[609.0],94.594595,95.945946,90.540541
