In [58]:
# Add the Pandas dependency.
import pandas as pd
import os

# Files to load
school_data_to_load = os.path.join("Resources","schools_complete.csv")
student_data_to_load = os.path.join("Resources","students_complete.csv")

school_data_df=pd.read_csv(school_data_to_load)

student_data_df=pd.read_csv(student_data_to_load)


In [59]:
# Add each prefix and suffix to remove to a list.
prefixes_suffixes = ["Dr. ", "Mr. ","Ms. ", "Mrs. ", "Miss ", " MD", " DDS", " DVM", " PhD"]

In [60]:
# Iterate through the words in the "prefixes_suffixes" list and replace them with an empty space, "".
for word in prefixes_suffixes:
    student_data_df["student_name"] = student_data_df["student_name"].str.replace(word,"")

In [61]:
# Combine the data into a single dataset.
school_data_complete_df = pd.merge(student_data_df,school_data_df,on = ["school_name","school_name"])

In [62]:
# Calculate the total number of schools
school_uniques = school_data_complete_df["school_name"].unique()
school_count = len(school_uniques)

In [63]:
# Calculate the total budget.
total_budget = school_data_df["budget"].sum()

In [64]:
# Calculate the average math and reading scores.
average_math_score = school_data_complete_df["math_score"].mean()
average_reading_score = school_data_complete_df["reading_score"].mean()

In [65]:
# Determining the passing grade
passing_math = school_data_complete_df["math_score"] >= 70
passing_reading = school_data_complete_df["reading_score"] >= 70

In [66]:
# Get all the students who are passing math in a new DataFrame
passing_math = school_data_complete_df[school_data_complete_df["math_score"]>= 70]
passing_reading = school_data_complete_df[school_data_complete_df["reading_score"]>= 70]
passing_math_count = len(passing_math)
passing_reading_count = len(passing_reading)

In [67]:
# Get the total number of students
student_count = school_data_complete_df["student_name"].count()

In [68]:
# Calculate the percent that passed math and reading
passing_math_percentage = passing_math_count / float(student_count) * 100
passing_reading_percentage = passing_reading_count / float(student_count) * 100

In [69]:
# Calculate the students who passed both math and reading.
passed_math_reading = school_data_complete_df[(school_data_complete_df["math_score"]>= 70) & (school_data_complete_df["reading_score"]>= 70)]

In [70]:
# Calculate the number of students who passed both math and reading.
overall_passing_math_reading_count = passed_math_reading["student_name"].count()

In [71]:
# Calculate the overall passing percentage.
overall_passing_percentage =  (overall_passing_math_reading_count / student_count) * 100

In [72]:
# Adding a list of values with keys to create a new DataFrame.
district_summary_df = pd.DataFrame([
    {"Total Schools": school_count,
     "Total Students": student_count,
     "Total Budget": total_budget,
     "Average Reading Score": average_reading_score,
     "Average Math Score": average_math_score,
     "% Passing Reading": passing_reading_percentage,
     "% Passing Math": passing_math_percentage,
     "% Overall Passing": overall_passing_percentage
    }])

In [73]:
# Define a function that calculates the percentage of students that passed both 
# math and reading and prints the passing percentage to the output when the
# function is called.
def passing_math_percent(pass_math_count, student_count):
    return pass_math_count / float(student_count) * 100

In [74]:
# Format "Total Budget" to have the comma for a thousands separator, a decimal separator, and a "$".
district_summary_df["Total Budget"] = district_summary_df["Total Budget"].map("${:,}".format)

In [75]:
# Format the "Total Students" to have the comma for a thousands separator.
district_summary_df["Total Students"] = district_summary_df["Total Students"].map("{:,}".format)

In [76]:
# Format the columns.
district_summary_df["Average Reading Score"] = district_summary_df["Average Reading Score"].map("{:.1f}".format)

In [77]:
# Format the columns.
district_summary_df["Average Math Score"] = district_summary_df["Average Math Score"].map("{:.1f}".format)

In [78]:
# Format the columns.
district_summary_df["% Passing Reading"] = district_summary_df["% Passing Reading"].map("{:.0f}".format)
district_summary_df["% Passing Math"] = district_summary_df["% Passing Math"].map("{:.0f}".format)
district_summary_df["% Overall Passing"] = district_summary_df["% Overall Passing"].map("{:.0f}".format)

In [79]:
# Reorder the columns in the order you want them to appear.
new_column_order = ["Total Schools", "Total Students", "Total Budget","Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"]

In [80]:
# Assign district summary df the new column order.
district_summary_df = district_summary_df[new_column_order]

In [81]:
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428",79.0,81.9,75,86,65
