In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import os 

# Files to Load 
school_data = "Resources/schools_complete.csv"
student_data = "Resources/students_complete.csv"

# Store csv files in DataFrames 
school_df = pd.read_csv(school_data)
student_df = pd.read_csv(student_data)

# Combine the data into a single dataset.  
combined_df = pd.merge(student_df, school_df, how="left", on=["school_name", "school_name"])
combined_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [5]:
#DISTRICT SUMMARY
#Calculate Number of Schools
school_total = len(school_df["school_name"].unique())

#Calculate Number of Students
student_total = school_df["size"].sum()

#Calculate Total Budget
budget_total = school_df["budget"].sum()

#Calculate Average Math Score
avg_math_score = student_df["math_score"].mean()

#Calculate Average Reading Score
avg_reading_score = student_df["reading_score"].mean()

#Calculate % Passing Math
pct_passing_math = ((student_df["math_score"] >= 70).sum()/student_total)*100

#Calculate % Passing Reading
pct_passing_reading = ((student_df["reading_score"] >= 70).sum()/student_total)*100

#Calculate % Passing Both
pct_passing_overall = (((student_df["math_score"] >= 70).count() & (student_df["reading_score"] >=70).count())/student_total)*100

#Create DataFrame for Results
district_summary = pd.DataFrame({"School Total": [school_total],
                                "Student Total": [student_total],
                                "Budget": [budget_total],
                                "Average Math Score": [avg_math_score],
                                "Average Reading Score": [avg_reading_score],
                                "Percent Passing Math": [pct_passing_math],
                                "Percent Passing Reading": [pct_passing_reading],
                                "Percent Passing Overall": [pct_passing_overall]})
                    
#Order Columns 
district_summary = district_summary[["School Total", "Student Total", "Budget", "Average Math Score", "Average Reading Score", "Percent Passing Math", "Percent Passing Reading", "Percent Passing Overall"]]

              
#Clean Results
district_summary["Student Total"] = district_summary["Student Total"].map("{:,}".format)
district_summary["Budget"] = district_summary["Budget"].map("${:,}".format)
district_summary["Average Math Score"] = district_summary["Average Math Score"].map("{:.2f}".format)                    
district_summary["Average Reading Score"] = district_summary["Average Reading Score"].map("{:.2f}".format)  
district_summary["Percent Passing Math"] = district_summary["Percent Passing Math"].map("{:.2f}%".format)
district_summary["Percent Passing Reading"] = district_summary["Percent Passing Reading"].map("{:.2f}%".format)
district_summary["Percent Passing Overall"] = district_summary["Percent Passing Overall"].map("{:.2f}%".format)

district_summary.head()

Unnamed: 0,School Total,Student Total,Budget,Average Math Score,Average Reading Score,Percent Passing Math,Percent Passing Reading,Percent Passing Overall
0,15,39170,"$24,649,428",78.99,81.88,74.98%,85.81%,1.33%


In [3]:
#SCHOOL SUMMARY
#School Name
group_school = combined_df.set_index("school_name").groupby(["school_name"], as_index=False)
#School Type
school_type = combined_df.set_index("school_name")["type"] 
#Total Students
school_students = group_school["Student ID"].count()
#Total School Budget
school_budget = school_df.set_index("school_name")["budget"]
#Per Student Budget
per_studet_budget = school_df.set_index("school_name")["budget"]/school_df.set_index("school_name")["size"]
#Avg Math Score
school_avg_math = group_school["math_score"].mean()
#Avg Reading Score
school_avg_reading = group_school["reading_score"].mean()
#% Pass Math
school_pct_math = combined_df[combined_df["math_score"] >= 70].groupby("school_name")["Student ID"].count()/school_students 
#% Pass Reading
school_pct_reading = combined_df[combined_df["reading_score"] >= 70].groupby("school_name")["Student ID"].count()/school_students
#% Pass Both
school_pct_both = combined_df[(combined_df["math_score"] >= 70) & (combined_df["reading_score"] >= 70)].groupby("school_name")["Student ID"].count()/school_students

#DataFrame
school_summary_table = pd.DataFrame({
    "School Type": [school_type]
    "Student Total": [school_students]
    "Budget": [school_budget]
    "Per Student Budget": [per_student_budget]
    "Average Math Score": [school_avg_math]
    "Average Reading Score": [school_avg_reading]
    "Percent Passing Math": [school_pct_math]
    "Percent Passing Reading": [school_pct_reading]
    "Percent Passing Overall": [school_pct_both] 
})
#Order Columns
#school_summary_table = [
#    "School Type",
#    "Student Total",
#    "Budget",
#    "Per Student Budget",
#    "Average Math Score",
#    "Average Reading Score",
#    "Percent Passing Math",
#    "Percent Passing Reading",
#    "Percent Passing Overall"]

school_summary_table.style.format({"Student Total": "{:,}",
                            "Budget": "${:,}",
                            "Average Math Score": "{:,.2f}",                    
                            "Average Reading Score": "{:,.2f}",  
                            "Percent Passing Math": "{:,.2%}",
                            "Percent Passing Reading": "{:,.2%}",
                            "Percent Passing Overall": "{:,.2%}"})

SyntaxError: invalid syntax (<ipython-input-3-ae47ccb88ba8>, line 26)

In [None]:
#Sort and display the top five performing schools by % overall passing
top_schools = school_summary_table.sort_values("Percent Passing Overall", ascending=false)
top_schools.head()

In [None]:
#Sort and display the five worst-performing schools by % overall passing
bottom_schools = school_summary_table.sort_values("Percent Passing Overall")
bottom_schools.head()

In [None]:
#Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.
    #Create a pandas series for each grade. Hint: use a conditional statement.
    
    
    #Group each series by school
    #Combine the series into a dataframe
    #Optional: give the displayed data cleaner formatting