In [1]:
# dependencies
import pandas as pd

# name CSVs to import
school_csv = "Resources/schools_complete.csv"
student_csv = "Resources/students_complete.csv"

# read CSVs into pandas
school_df = pd.read_csv(school_csv)
student_df = pd.read_csv(student_csv)

In [2]:
# view raw school table
school_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [3]:
# view raw student table
student_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [4]:
# merge data into single dataset
raw_df = pd.merge(student_df, school_df, how="left", on=["school_name", "school_name"])
raw_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [5]:
raw_df.shape

(39170, 11)

## District Summary

In [6]:
# create array of unique school names
unq_school = raw_df['school_name'].unique()
# get the length of the array
school_count = len(unq_school)

# student count 
stu_count = student_df['student_name'].count()

# double check with district students
dist_students = school_df['size'].sum()

# total budget for all schools
total_budget = school_df['budget'].sum()

# average math score
avg_math = raw_df['math_score'].mean()

# average reading score
avg_reading = raw_df['reading_score'].mean()

# percent of students passing math (70 or greater)
math_num = raw_df.loc[raw_df['math_score']>= 70]['student_name'].count()
math_prcnt = math_num/stu_count*100

# percent of students passing reading (70 or greater)
reading_num = raw_df.loc[raw_df['reading_score']>= 70]['student_name'].count()
reading_prcnt = reading_num/stu_count*100

# percent of students with overall passing grades (70 or greater in both math & reading)
passing_num = raw_df.loc[(raw_df['math_score']>= 70) & (raw_df['reading_score'] >= 70)]['student_name'].count()
passing_prcnt = passing_num/stu_count*100


# display summary dataframe
district_summary = pd.DataFrame({
    "Total Schools": [school_count],
    "Total Students": [stu_count],
    "Total Budget": [total_budget],
    "Average Math Score": [avg_math],
    "Average Reading Score": [avg_reading],
    "% Passing Math": [math_prcnt],
    "% Passing Reading": [reading_prcnt],
    "% Overall Passing": [passing_prcnt]    
})

district_summary

# map format for ease of view
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,}".format)
district_summary["Average Math Score"] = district_summary["Average Math Score"].map("{:.2f}%".format)
district_summary["Average Reading Score"] = district_summary["Average Reading Score"].map("{:.2f}%".format)
district_summary["% Passing Math"] = district_summary["% Passing Math"].map("{:.2f}%".format)
district_summary["% Passing Reading"] = district_summary["% Passing Reading"].map("{:.2f}%".format)
district_summary["% Overall Passing"] = district_summary["% Overall Passing"].map("{:.2f}%".format)

district_summary


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428",78.99%,81.88%,74.98%,85.81%,65.17%


## School Summary