# Py City School District Analysis

In [1]:
import pandas as pd

In [2]:
school_csv = 'Resources/schools_complete.csv'
student_csv = 'Resources/students_complete.csv'

school_data = pd.read_csv(school_csv)
student_data = pd.read_csv(student_csv)

complete_school_data = pd.merge(student_data, school_data, how="left", on="school_name")
complete_school_data.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## Creating District Summary Table

In [3]:
#Calculating number of schools:
schools = len(school_data.school_name)

#Calulating number of students and storing a formatted version for the table:
students = student_data.student_name.count()
students_formatted = '{:,.0f}'.format(students)

#Calulating budget and storing a formatted version for the table:
budget = school_data.budget.sum()
budget_formatted = '${:,.2f}'.format(float(budget))

#Calculating average math and reading scores:
avg_math = round(student_data.math_score.mean(), 6)
avg_reading = round(student_data.reading_score.mean(),6)

#Verified outputs
#print(schools, students, students_formatted, budget, budget_formatted, avg_math, avg_reading)

In [4]:
#Calculating passing percentages:
pct_pass_math = round((len(student_data.loc[(student_data['math_score']>=70)])/students)*100, 6)
pct_pass_reading = round((len(student_data.loc[(student_data['reading_score']>=70)])/students)*100, 6)
pct_pass_combined = round((len(student_data.loc[(student_data['math_score']>=70) & 
                                                (student_data['reading_score']>=70)])/students)*100, 6)
#Verified outputs
#print(pct_pass_math, pct_pass_reading, pct_pass_combined)

In [5]:
#Building Data Frame
district_summary = pd.DataFrame({'Total Schools':[schools],
                                 'Total Students': [students_formatted],
                                 'Total Budget': [budget_formatted],
                                 'Average Math Score': [avg_math],
                                 'Average Reading Score': [avg_reading],
                                 '% Passing Math': [pct_pass_math],
                                 '% Passing Reading': [pct_pass_reading],
                                 '% Overall Passing': [pct_pass_combined]
                                })
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


## Creating School Summary Table

In [6]:
school_summary = complete_school_data.groupby('school_name').agg({'type': ['unique'],
                                                              'student_name': ['count'],
                                                               'budget': ['unique'],
                                                               'math_score': ['mean'],
                                                               'reading_score': ['mean'],
                                                               })
school_summary.head()

Unnamed: 0_level_0,type,student_name,budget,math_score,reading_score
Unnamed: 0_level_1,unique,count,unique,mean,mean
school_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Bailey High School,[District],4976,[3124928],77.048432,81.033963
Cabrera High School,[Charter],1858,[1081356],83.061895,83.97578
Figueroa High School,[District],2949,[1884411],76.711767,81.15802
Ford High School,[District],2739,[1763916],77.102592,80.746258
Griffin High School,[Charter],1468,[917500],83.351499,83.816757


School Summary
Create an overview table that summarizes key metrics about each school, including:

% Passing Math

% Passing Reading

% Overall Passing (The percentage of students that passed math and reading.)

In [7]:
school_summary.columns = ['Type', 'Total Students', 'Budget', 'Average Math Score', 
                          'Average Read Score']

In [9]:
school_summary['Budget per Student'] = school_summary.Budget/school_summary['Total Students']
school_summary.head()

Unnamed: 0_level_0,Type,Total Students,Budget,Average Math Score,Average Read Score,Budget per Student
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bailey High School,[District],4976,[3124928],77.048432,81.033963,[628.0]
Cabrera High School,[Charter],1858,[1081356],83.061895,83.97578,[582.0]
Figueroa High School,[District],2949,[1884411],76.711767,81.15802,[639.0]
Ford High School,[District],2739,[1763916],77.102592,80.746258,[644.0]
Griffin High School,[Charter],1468,[917500],83.351499,83.816757,[625.0]
