In [423]:
# Dependencies and Setup
import pandas as pd
import numpy as np
from io import StringIO

In [424]:
# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_df = pd.read_csv(school_data_to_load)
student_df = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_student_df = pd.merge(student_df, school_df, how="left", on=["school_name", "school_name"])

In [426]:
#Dist Summary DataFrame
school_stus_df=school_student_df.drop(["student_name","gender","grade","School ID"],axis=1)
school_stus_df.head()

Unnamed: 0,Student ID,school_name,reading_score,math_score,type,size,budget
0,0,Huang High School,66,79,District,2917,1910635
1,1,Huang High School,94,61,District,2917,1910635
2,2,Huang High School,90,60,District,2917,1910635
3,3,Huang High School,67,58,District,2917,1910635
4,4,Huang High School,97,84,District,2917,1910635


In [530]:
#Calculate the total number of schools
total_schools=school_df['school_name'].count()

#Calculate the total number of students
tot_students= student_df['student_name'].count()

#Calculate the total budget per school
total_budget= school_df["budget"].sum()

#Calculate the average math score 
average_math_score=student_df["math_score"].mean()

#Calculate the average reading score
average_reading_score=student_df["reading_score"].mean()

#Calculate the number of students with a passing math score
students_pass_math_score= student_df.loc[student_df["math_score"]>=70] 
#Calculate the percentage of students with a passing math score
perc_stu_pass_math= round(len(students_pass_math_score)/tot_students *100,6)

#Calculate the number of students with a passing reading score
students_pass_reading_score=student_df.loc[student_df["reading_score"]>=70]
#Calculate the percentage of students with a passing reading score
perc_stu_pass_reading= round(len(students_pass_reading_score)/tot_students *100,6)

# Calculate the percentage of students who passed math and reading (% Overall Passing)
overall_perc=(perc_stu_pass_math+ perc_stu_pass_reading)/2


In [499]:
students_pass_math_score

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
4,4,Bonnie Ray,F,9th,Huang High School,97,84
5,5,Bryan Miranda,M,9th,Huang High School,94,94
6,6,Sheena Carter,F,11th,Huang High School,82,80
8,8,Michael Roth,M,10th,Huang High School,95,87
...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90


In [531]:
# District Summary

# Create a dataframe to hold the above results
dist_summary=pd.DataFrame({
    "Total Schools":[num_total_schools],"Total Students":[tot_students],
    "Total Budget":[total_budget],"Average Math Score": [average_math_score],"Average Reading Score": [average_reading_score],
    "Passing Math %": [perc_stu_pass_math],"Passing Reading %": [perc_stu_pass_reading], "Overall Passing %":[overall_perc]})
dist_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,Passing Math %,Passing Reading %,Overall Passing %
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,80.393158


In [532]:
# Improve formatting before outputting
dist_summary["Total Budget"] = dist_summary["Total Budget"].map("${0:,.2f}".format)
dist_summary["Total Students"] = dist_summary["Total Students"].map("{0:,.0f}".format)
dist_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,Passing Math %,Passing Reading %,Overall Passing %
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,80.393158


In [533]:
# SCHOOL SUMMARY

In [534]:
# Groupby function on merged school_df and students_df
school_stus_index= school_stus_df.set_index('school_name')
school_group= school_stus_index.groupby(['school_name'])      

In [535]:
# From school_df and students_df fot first analysis to fit rows
school_in=school_df.set_index("school_name")
school_group1= school_in.groupby(["school_name"])

In [553]:

# School Type (District per school)
school_type=school_in["type"] 

# Total_Students per school
total_students= school_in["size"]

# Total Budget per School
school_budget=school_in["budget"] 

# Per student Budget
student_budget= school_budget/total_students

# Averages using Groupby on the reading score per school
average_reading_score = school_group["reading_score"].mean()

# Averages using Groupby on the math score per school
average_math_score = school_group["math_score"].mean()

# % Passing Math
passing_math= students_pass_math_score.groupby("school_name") 
tot_num_students= school_group["Student ID"].count()
perc_passing_math= passing_math["Student ID"].count()/tot_num_students*100

# % Passing Reading
passing_reading= students_pass_reading_score.groupby("school_name") 
perc_passing_reading= passing_reading["Student ID"].count()/tot_num_students*100

#% Overall Passing (The percentage of students that passed math and reading.)
overall_passing= (perc_passing_math + perc_passing_reading)/2



In [554]:
# Create a dataframe to hold School Summary

schools_summary=pd.DataFrame({
    "School Type":school_type,
    "Total Students":total_students,
    "Total School Budget":school_budget,
    "Per Student Budget":student_budget,
    "Average Math Score":average_math_score,
    "Average Reading Score":average_reading_score,
    "% Passing Math":perc_passing_math,
    "% Passing Reading":perc_passing_reading,
    "Overall Passing Rate":overall_passing})

# Columns
schools_summary= schools_summary[["School Type","Total Students","Total School Budget",
                                "Per Student Budget","Average Math Score",
                                 "Average Reading Score","% Passing Math",
                                 "% Passing Reading","Overall Passing Rate"]]

# Formatring table
schools_summary['Total School Budget'] = schools_summary['Total School Budget'].map('${:,.2f}'.format)
schools_summary['Per Student Budget'] = schools_summary['Per Student Budget'].map('${:,.2f}'.format)

schools_summary.head()


Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,"$3,124,928.00",$628.00,77.048432,81.033963,66.680064,81.93328,74.306672
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133477,97.039828,95.586652
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988471,80.739234,73.363852
Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.309602,79.299014,73.804308
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392371,97.138965,95.265668


In [438]:
# Top Performing Schools
top_schools= schools_summary.loc[schools_summary["Overall Passing Rate"]>90]
top_schools.sort_values(["Overall Passing Rate"], ascending=False).head()

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate,Spending Ranges(Per Student)
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Cabrera High School,Charter,1858,"$1,081,356.00",$582.00,83.061895,83.97578,94.133477,97.039828,95.586652,<$585
Thomas High School,Charter,1635,"$1,043,130.00",$638.00,83.418349,83.84893,93.272171,97.308869,95.29052,$630-645
Pena High School,Charter,962,"$585,858.00",$609.00,83.839917,84.044699,94.594595,95.945946,95.27027,$585-630
Griffin High School,Charter,1468,"$917,500.00",$625.00,83.351499,83.816757,93.392371,97.138965,95.265668,$585-630
Wilson High School,Charter,2283,"$1,319,574.00",$578.00,83.274201,83.989488,93.867718,96.539641,95.203679,<$585


In [439]:
# Bottom Performing Schools (By Passing Rate)
bottom_perf_schools= schools_summary.loc[schools_summary["Overall Passing Rate"]<75]
bottom_perf_schools.sort_values(["Overall Passing Rate"], ascending=True).head()

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate,Spending Ranges(Per Student)
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Rodriguez High School,District,3999,"$2,547,363.00",$637.00,76.842711,80.744686,66.366592,80.220055,73.293323,$630-645
Figueroa High School,District,2949,"$1,884,411.00",$639.00,76.711767,81.15802,65.988471,80.739234,73.363852,$630-645
Huang High School,District,2917,"$1,910,635.00",$655.00,76.629414,81.182722,65.683922,81.316421,73.500171,$645-680
Johnson High School,District,4761,"$3,094,650.00",$650.00,77.072464,80.966394,66.057551,81.222432,73.639992,$645-680
Ford High School,District,2739,"$1,763,916.00",$644.00,77.102592,80.746258,68.309602,79.299014,73.804308,$630-645


In [440]:
# Using the student dataframe

# Math Scores by Grade
student_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84
