In [None]:
# Dependencies and Setup
import pandas as pd
import numpy as np

In [None]:
# Files to Load 
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

In [None]:
# Read School & Student Data files into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

In [None]:
# Combine the data into a single dataset
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()

In [None]:
school_data_complete.shape

## District Summary

* Calculate the total number of schools

In [None]:
total_schools = school_data_complete["school_name"].unique()
total_schools = (len(total_schools))
print(f'Total schools = {total_schools}')

In [None]:
schools = school_data_complete["school_name"].unique()
print(schools)

In [None]:
types = school_data_complete["type"].unique()
types

* Calculate the total number of students

In [None]:
total_students = len(school_data_complete)
print(f'Total students = {total_students}')

* Calculate the total budget

In [None]:
budget_group_df = pd.DataFrame(school_data_complete.groupby("school_name").max())
total_budget = budget_group_df["budget"].sum()
total_budget

In [None]:
#Convert grade to integer
school_data_complete["grade"] = school_data_complete["grade"].str.replace("th", "")
school_data_complete["grade"] = school_data_complete["grade"].astype(int)
school_data_complete.head()

In [None]:
#Format budgets to appear as currency
budget_group_df = budget_group_df["budget"].map("${:,}".format)
budget_group_df

* Calculate the average math score 

In [None]:
dist_math_avg = school_data_complete["math_score"].mean()
dist_math_avg = round(dist_math_avg, 1)
dist_math_avg

*Calculate the average reading score

In [None]:
dist_reading_avg = school_data_complete["reading_score"].mean()
dist_reading_avg = round(dist_reading_avg, 1)
dist_reading_avg

* Calculate the average reading score

In [None]:
school_avgs = (school_data_complete.groupby(["school_name"]).mean())
school_avgs = round(school_avgs, 1)
school_avgs

* Calculate the overall passing rate (overall average score), i.e. (avg. math score + avg. reading score)/2

In [None]:
overall = (dist_math_avg + dist_reading_avg)/2
overall = (round(overall, 1))
overall

* Calculate the percentage of students with a passing math score (70 or greater)

In [None]:
math_pass_count = school_data_complete.loc[school_data_complete["math_score"] >= 70]
math_pass_pcnt = (((len(math_pass_count))/total_students) * 100)
math_pass_pcnt = round(math_pass_pcnt,1)
math_pass_pcnt

* Calculate the percentage of students with a passing reading score (70 or greater)

In [None]:
reading_pass_count = school_data_complete.loc[school_data_complete["reading_score"] >= 70]
reading_pass_pcnt = (((len(reading_pass_count))/total_students) *100)
reading_pass_pcnt = (round(reading_pass_pcnt, 1))
reading_pass_pcnt

In [None]:
#Create new District Summary dataframe:
dist_df = pd.DataFrame({
        "Total Students": [total_students],
        "Total District Budget": (total_budget),
        "District Math Average": (dist_math_avg),
        "Percent Students Passing Math" : str(math_pass_pcnt)+"%",
        "District Reading Average": (dist_reading_avg),
        "Percent Students Passing Reading": str(reading_pass_pcnt)+"%",
        "Overall Passing Rate": str(overall)+"%"
})

In [None]:
#Format values in new dataframe
dist_df["Total Students"] = dist_df["Total Students"].map("{:,}".format)
dist_df["Total District Budget"] = dist_df["Total District Budget"].map("${:,}".format)
dist_df

## School Summary

* Create an overview table that summarizes key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)
  
* Create a dataframe to hold the above results

In [None]:
school_pop = school_data_complete["school_name"].value_counts()
school_pop

In [None]:
schools_df = pd.DataFrame(schools, school_pop)
schools_df

In [None]:
school_group = school_data_complete.groupby("school_name")
budgets = school_group["budget"].max()
print(budgets)

In [None]:
pp_spend = budgets/school_pop
pp_spend = pp_spend.astype(int)
pp_spend

In [None]:
school_level_summary_df = pd.DataFrame({
    "Student Pop.":school_pop, 
    "Budget": budgets, 
    "Per Pupil Spend": pp_spend}, 
    index=schools
)
school_level_summary_df

## Top Performing Schools (By Passing Rate)

* Sort and display the top five schools in overall passing rate

In [None]:
math_rate_by_school = school_group["math_score"].mean()
math_rate_by_school = round(math_rate_by_school, 1)
math_rate_by_school.sort_values(ascending=False)

In [None]:
reading_rate_by_school = school_group["reading_score"].mean()
reading_rate_by_school = round(reading_rate_by_school, 1)
reading_rate_by_school 

In [None]:
school_level_summary_df["Math Pass Rate"] = math_rate_by_school
school_level_summary_df["Reading Pass Rate"] = reading_rate_by_school

In [None]:
overall_by_school = (math_rate_by_school + reading_rate_by_school)/2
overall_by_school =(round(overall_by_school, 1))
overall_by_school = overall_by_school.sort_values(ascending=False)
overall_by_school

school_level_summary_df["Overall"] = overall_by_school
del school_level_summary_df["Budget"]

school_level_summary_df.head(8)

In [None]:
school_level_summary_df.dtypes

In [None]:
ranked_reading = reading_rate_by_school.sort_values(ascending=False)
ranked_reading = ranked_reading.head(5)
ranked_reading = round(ranked_reading, 1)
ranked_reading

In [None]:
ranked_math = math_rate_by_school.nlargest(5)
ranked_math = round(ranked_math, 1)
ranked_math

In [None]:
best_overall = overall_by_school.head(5)
best_overall = round(best_overall, 1)
best_overall

In [None]:
worst_overall = overall_by_school.nsmallest(5)
worst_overall = round(worst_overall, 1)
worst_overall

In [None]:
top5_overall = school_level_summary_df["Overall"].nlargest(5)
top5_overall

In [None]:
bottom5_overall = school_level_summary_df["Overall"].nsmallest(5)
bottom5_overall

In [None]:
top5_data = ({"Top 5 Performing Schools": [
    "Pena High School",       
    "Holden High School",     
    "Wright High School",     
    "Shelton High School",   
    "Griffin High School"]
             })    
top5_data_df = pd.DataFrame(top5_data)
top5_data_df              

In [None]:
grade_grouped = school_data_complete.groupby(["school_name", "grade"])
grade_grouped = (round(grade_grouped.mean(),1))
grade_grouped_df = pd.DataFrame(grade_grouped[["reading_score", "math_score"]])
grade_grouped_df = grade_grouped_df.rename(columns={"reading_score": "Reading Avg", "math_score": "Math Avg"})
grade_grouped_df = grade_grouped_df.sort_values(by=["school_name", "grade"])
grade_grouped_df.head()

In [None]:
#Add new column for Overall Avg
grade_grouped_df["Overall Avg"] = 0.0

## Math & Reading Scores by Grade & School

In [None]:
grade_grouped_df["Overall Avg"] = round(((grade_grouped_df["Reading Avg"] + grade_grouped_df["Math Avg"])/2), 1)
grade_grouped_df

In [None]:
print(school_level_summary_df["Per Pupil Spend"].max())
print(school_level_summary_df["Per Pupil Spend"].min())

In [None]:
bins=[575, 600, 625, 650, 675]
group_labels = ["Low Investment", "Low Avg", "High Avg", "High Investment"]
school_level_summary_df["Investment Level"] = pd.cut(school_level_summary_df["Per Pupil Spend"], bins, labels = group_labels)
school_level_summary_df.head()

In [None]:
size_bins = [0, 1000, 2000, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]
school_level_summary_df["School Size"] = pd.cut(school_level_summary_df["Student Pop."], size_bins, labels = group_names)
school_level_summary_df["Per Pupil Spend"] = school_level_summary_df["Per Pupil Spend"].map("${}".format)
school_level_summary_df.sort_values("School Size")

In [None]:
def highlight(val):
    yellow = 'background-color: yellow' if val =="High Investment" else ''
    return yellow
school_level_summary_df.style.applymap(highlight)