In [4]:
# Dependencies and Setup
import pandas as pd


In [5]:
# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

In [7]:
school_count = school_data["school_name"].count()
student_pop = school_data["size"].sum()
total_budget = school_data["budget"].sum()


In [8]:
#Districtwide Student Stats
scores = student_data[["Student ID","school_name","reading_score","math_score"]]

avg_math_score = scores["math_score"].mean()

avg_read_score = scores["reading_score"].mean()

overall_avg_score = (avg_math_score + avg_read_score)/2


In [9]:
passing_math = scores.loc[scores["math_score"] >= 70]
pct_pass_math = len(passing_math)/student_pop * 100

passing_read = scores.loc[scores["reading_score"] >= 70]
pct_pass_read = len(passing_read)/student_pop * 100


In [7]:
district_sum = pd.DataFrame({"Total Schools" : [school_count],
                             "Total Students" : student_pop,
                             "Total Budget" : total_budget,
                             "Average Math Score" : avg_math_score,
                             "Average Reading Score" : avg_read_score,
                             "Pct. Pass Math" : pct_pass_math,
                             "Pct. Pass Read" : pct_pass_read,
                             "Pct. Overall Passing Rate" : overall_avg_score})
#district_sum.map.format({"Total Students" : "{:,.0f}",
#                           "Total Budget" : "${:,.0f}"})
district_sum

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,Pct. Pass Math,Pct. Pass Read,Pct. Overall Passing Rate
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,80.431606


In [10]:
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])


In [38]:
school_index = school_data.set_index('school_name')
school_type = school_index['type']
school_size = school_index['size']
school_budget = school_index['budget']



In [39]:
budget_per_student = (school_budget/school_size)

school_budget = school_budget.map('${:,.2f}'.format)
budget_per_student = budget_per_student.map('${:,.2f}'.format)


school_budget

school_name
Huang High School        $1,910,635.00
Figueroa High School     $1,884,411.00
Shelton High School      $1,056,600.00
Hernandez High School    $3,022,020.00
Griffin High School        $917,500.00
Wilson High School       $1,319,574.00
Cabrera High School      $1,081,356.00
Bailey High School       $3,124,928.00
Holden High School         $248,087.00
Pena High School           $585,858.00
Wright High School       $1,049,400.00
Rodriguez High School    $2,547,363.00
Johnson High School      $3,094,650.00
Ford High School         $1,763,916.00
Thomas High School       $1,043,130.00
Name: budget, dtype: object

In [40]:
school_scores = school_data_complete.groupby(['school_name'])


school_avg_math = school_scores["math_score"].mean()
school_avg_read = school_scores["reading_score"].mean()

school_avg_math = school_avg_math.map('{:.2f}'.format)
school_avg_read = school_avg_read.map('{:.2f}'.format)

In [41]:
sch_passing_math = scores.loc[school_data_complete["math_score"] >= 70]
sch_group_by_math = sch_passing_math.groupby(['school_name']).agg('count')
sch_pct_pass_math = (sch_group_by_math)['math_score']/school_size * 100

sch_passing_read = scores.loc[school_data_complete["reading_score"] >= 70]
sch_group_by_read = sch_passing_read.groupby(['school_name']).agg("count")
sch_pct_pass_read = (sch_group_by_read)['reading_score']/school_size * 100

sch_overall_pass_rate = (sch_pct_pass_math + sch_pct_pass_read)/2

school_size = school_size.map('{:,.0f}'.format)
sch_pct_pass_math = sch_pct_pass_math.map('{:.2f}'.format)
sch_pct_pass_read = sch_pct_pass_read.map('{:.2f}'.format)
sch_overall_pass_rate = sch_overall_pass_rate.map('{:.2f}'.format)
sch_overall_pass_rate

school_name
Bailey High School       74.31
Cabrera High School      95.59
Figueroa High School     73.36
Ford High School         73.80
Griffin High School      95.27
Hernandez High School    73.81
Holden High School       94.38
Huang High School        73.50
Johnson High School      73.64
Pena High School         95.27
Rodriguez High School    73.29
Shelton High School      94.86
Thomas High School       95.29
Wilson High School       95.20
Wright High School       94.97
dtype: object

In [42]:

school_sum = pd.DataFrame({"School Type" : school_type,
                           "Total School Size" : school_size,
                           "Total School Budget" : school_budget,
                           "Budget Per Student" : budget_per_student,
                           "Average Math Schore" : school_avg_math,
                           "Average Reading Score" : school_avg_read,
                           "% Passing Math" : sch_pct_pass_math,
                           "% Passing Reading" : sch_pct_pass_read,
                           "Overall Passing Rate" : sch_overall_pass_rate
                           }
)

school_sum['Total School Budget'] = school_sum['Total School Budget']
school_sum

Unnamed: 0,School Type,Total School Size,Total School Budget,Budget Per Student,Average Math Schore,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
Bailey High School,District,4976,"$3,124,928.00",$628.00,77.05,81.03,66.68,81.93,74.31
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.06,83.98,94.13,97.04,95.59
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.71,81.16,65.99,80.74,73.36
Ford High School,District,2739,"$1,763,916.00",$644.00,77.1,80.75,68.31,79.3,73.8
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.35,83.82,93.39,97.14,95.27
Hernandez High School,District,4635,"$3,022,020.00",$652.00,77.29,80.93,66.75,80.86,73.81
Holden High School,Charter,427,"$248,087.00",$581.00,83.8,83.81,92.51,96.25,94.38
Huang High School,District,2917,"$1,910,635.00",$655.00,76.63,81.18,65.68,81.32,73.5
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.07,80.97,66.06,81.22,73.64
Pena High School,Charter,962,"$585,858.00",$609.00,83.84,84.04,94.59,95.95,95.27


In [47]:
High5_Schools = school_sum.sort_values("Overall Passing Rate",ascending = False)
High5_Schools.head(n=5)

Unnamed: 0,School Type,Total School Size,Total School Budget,Budget Per Student,Average Math Schore,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.06,83.98,94.13,97.04,95.59
Thomas High School,Charter,1635,"$1,043,130.00",$638.00,83.42,83.85,93.27,97.31,95.29
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.35,83.82,93.39,97.14,95.27
Pena High School,Charter,962,"$585,858.00",$609.00,83.84,84.04,94.59,95.95,95.27
Wilson High School,Charter,2283,"$1,319,574.00",$578.00,83.27,83.99,93.87,96.54,95.2


In [48]:
Low5_Schools = school_sum.sort_values("Overall Passing Rate")
Low5_Schools.head(n=5)

Unnamed: 0,School Type,Total School Size,Total School Budget,Budget Per Student,Average Math Schore,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
Rodriguez High School,District,3999,"$2,547,363.00",$637.00,76.84,80.74,66.37,80.22,73.29
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.71,81.16,65.99,80.74,73.36
Huang High School,District,2917,"$1,910,635.00",$655.00,76.63,81.18,65.68,81.32,73.5
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.07,80.97,66.06,81.22,73.64
Ford High School,District,2739,"$1,763,916.00",$644.00,77.1,80.75,68.31,79.3,73.8


In [74]:
ninth_gr = student_data.loc[student_data['grade']=='9th'].groupby('school_name')
tenth_gr = student_data.loc[student_data['grade']=='10th'].groupby('school_name')
eleventh_gr = student_data.loc[student_data['grade']=='11th'].groupby('school_name')
twelfth_gr = student_data.loc[student_data['grade']=='12th'].groupby('school_name')

math_ninth_by_school = ninth_gr['math_score'].mean()
math_tenth_by_school = tenth_gr['math_score'].mean()
math_eleventh_by_school = eleventh_gr['math_score'].mean()
math_twelfth_by_school = twelfth_gr['math_score'].mean()

math_summary_by_school = pd.DataFrame({"9th Grade" : math_ninth_by_school,
                                       "10th Grade" : math_tenth_by_school,
                                       "11th Grade" : math_eleventh_by_school,
                                       "12th Grade" : math_twelfth_by_school})
math_summary_by_school

Unnamed: 0_level_0,9th Grade,10th Grade,11th Grade,12th Grade
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.083676,76.996772,77.515588,76.492218
Cabrera High School,83.094697,83.154506,82.76556,83.277487
Figueroa High School,76.403037,76.539974,76.884344,77.151369
Ford High School,77.361345,77.672316,76.918058,76.179963
Griffin High School,82.04401,84.229064,83.842105,83.356164
Hernandez High School,77.438495,77.337408,77.136029,77.186567
Holden High School,83.787402,83.429825,85.0,82.855422
Huang High School,77.027251,75.908735,76.446602,77.225641
Johnson High School,77.187857,76.691117,77.491653,76.863248
Pena High School,83.625455,83.372,84.328125,84.121547
