In [128]:
# Import dependencies
import pandas as pd
from os import path

# Create reference to filepath CSV file (student and school data files)
student_data_to_load = path.join("../Resources/students_complete.csv")
school_data_to_load = path.join("../Resources/schools_complete.csv")

# Import CSVs into Pandas DataFrames
student_data = pd.read_csv(student_data_to_load)
school_data = pd.read_csv(school_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [129]:
# Calculate total number of unique schools in the district

school_count = len(school_data_complete.loc[:,"school_name"].unique())

school_count

15

In [130]:
# Calculate total number of students in the district

student_count = school_data_complete.loc[:,"student_name"].count()

student_count

39170

In [142]:
# Calculate the total budget in the district

total_budget = school_data_complete.loc[:,"budget"].unique().sum()

total_budget

24649428

In [132]:
# Calculate the average math score

avg_math_score = school_data_complete.loc[:,"math_score"].mean()

avg_math_score

78.98537145774827

In [133]:
# Calculate the average reading score

avg_reading_score = school_data_complete.loc[:,"reading_score"].mean()

avg_reading_score

81.87784018381414

In [134]:
## Calculate % passing math

# Create new dataframe consisting of only students with a math score of 70 or higher
passing_math_count = school_data_complete.loc[student_data["math_score"] >= 70].count()["student_name"]


# Calculate percent of total students that passed math
passing_math_percentage = (passing_math_count / float(student_count)) * 100

passing_math_percentage

74.9808526933878

In [135]:
## Calculate % passing reading

# Count and store total number of students who passed reading
passing_reading_count = school_data_complete.loc[(school_data_complete["reading_score"] >= 70)].count()["student_name"]

# Calculate percent of total students who passed reading
passing_reading_percentage = (passing_reading_count / student_count) * 100

passing_reading_percentage

85.80546336482001

In [139]:
# Calculate % passing overall

# Create new dataframe consisting of only students with both reading AND math scores of 70 or higher
passing_math_reading_count = school_data_complete.loc[(school_data_complete["math_score"] >= 70) & (school_data_complete["reading_score"] >= 70)].count()["student_name"]

passing_math_reading_percentage = (passing_math_reading_count / student_count) * 100

passing_math_percentage

74.9808526933878

In [144]:

data = {"Total Number of Unique Schools":[school_count], "Total Students":[student_count], "Total Budget":[total_budget], "Average Math Score":[avg_math_score], 
        "Average Reading Score":[avg_reading_score], "% Passing Math":[passing_math_percentage], "% Passing Reading":[passing_reading_percentage], 
        "% Overall Passing":[passing_math_reading_percentage]
        }

district_summary = pd.DataFrame(data, index=["District Summary"])

# Formatting
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)
district_summary["% Passing Math"] = district_summary["% Passing Math"].map("{:,.2f}%".format)
district_summary["% Passing Reading"] = district_summary["% Passing Reading"].map("{:,.2f}%".format)
district_summary["% Overall Passing"] = district_summary["% Overall Passing"].map("{:,.2f}%".format)

district_summary

Unnamed: 0,Total Number of Unique Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
District Summary,15,39170,"$24,649,428.00",78.985371,81.87784,74.98%,85.81%,65.17%
