### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [2]:
student_df = student_data.rename(columns={"student_name":"Name",
                                                  "gender":"Gender",
                                                  "grade":"Grade",
                                                  "school_name":"School Name",
                                                  "reading_score":"Reading Score",
                                                  "math_score": "Math Score"})
student_df.head()

Unnamed: 0,Student ID,Name,Gender,Grade,School Name,Reading Score,Math Score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [5]:
school_df = school_data.rename(columns={"school_name":"School Name",
                                      "type":"Type",
                                      "size":"Size",
                                      "budget":"Budget"})
school_df.head()

Unnamed: 0,School ID,School Name,Type,Size,Budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [6]:
# Combine the data into a single dataset.  
combined_data = pd.merge(student_df, school_df, how="left", on=["School Name", "School Name"])

combined_df = combined_data[["Student ID",
                            "Name",
                            "Gender",
                            "Grade",
                            "Reading Score",
                            "Math Score",
                            "School Name",
                            "School ID",
                            "Type",
                            "Size",
                            "Budget"]]

combined_df.head()

Unnamed: 0,Student ID,Name,Gender,Grade,Reading Score,Math Score,School Name,School ID,Type,Size,Budget
0,0,Paul Bradley,M,9th,66,79,Huang High School,0,District,2917,1910635
1,1,Victor Smith,M,12th,94,61,Huang High School,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,90,60,Huang High School,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,67,58,Huang High School,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,97,84,Huang High School,0,District,2917,1910635


## District Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average math score 

* Calculate the average reading score

* Calculate the percentage of students with a passing math score (70 or greater)

* Calculate the percentage of students with a passing reading score (70 or greater)

* Calculate the percentage of students who passed math **and** reading (% Overall Passing)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

In [8]:
total_schools = combined_df["School Name"].nunique()

st_index = combined_df.index
total_students = len(st_index)

total_budget = school_df["Budget"].sum()

avg_math_score = combined_df["Math Score"].mean()

avg_read_score = combined_df["Reading Score"].mean()

passing_math = len(combined_df[combined_df["Math Score"] >= 70]) / total_students

passing_read = len(combined_df[combined_df["Reading Score"] >= 70]) / total_students

overall_passing = len(combined_df.loc[(combined_df["Math Score"]>=70) & (combined_df["Reading Score"]>=70)]) / total_students

dist_summary_df = pd.DataFrame({'Total Schools': [total_schools],
                                'Total Students': total_students,
                                'Total Budget': total_budget,
                                'Average Math Score': avg_math_score,
                                'Average Reading Score': avg_read_score,
                                'Students Passing Math (%)': passing_math,
                                'Students Passing Reading (%)': passing_read,
                                'Students Overall Passing (%)': overall_passing
                                })

dist_summary_df['Total Students'] = dist_summary_df['Total Students'].map("{:,}".format)
dist_summary_df['Total Budget'] = dist_summary_df['Total Budget'].map("{:,}".format)
dist_summary_df['Average Math Score'] = dist_summary_df['Average Math Score'].map("{:.2f}".format)
dist_summary_df['Average Reading Score'] = dist_summary_df['Average Reading Score'].map("{:.2f}".format)
dist_summary_df['Students Passing Math (%)'] = dist_summary_df['Students Passing Math (%)'].map('{:.2%}'.format)
dist_summary_df['Students Passing Reading (%)'] = dist_summary_df['Students Passing Reading (%)'].map('{:.2%}'.format)
dist_summary_df['Students Overall Passing (%)'] = dist_summary_df['Students Overall Passing (%)'].map('{:.2%}'.format)

dist_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,Students Passing Math (%),Students Passing Reading (%),Students Overall Passing (%)
0,15,39170,24649428,78.99,81.88,74.98%,85.81%,65.17%


* Create an overview table that summarizes key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * % Overall Passing (The percentage of students that passed math **and** reading.)
  
* Create a dataframe to hold the above results

In [None]:


# school_summary_df = pd.DataFrame({'School Name': school_name,
#                                  'School Type': school_type,
#                                  'Total Students': school_total_students,
#                                  'Per Student Budget': per_student_budget,
#                                  'Average Math Score': school_avg_math_score,
#                                  'Average Reading Score': school_avg_read_score,
#                                  '% Passing Math': percent_passing_math,
#                                  '% Passing Reading': percent_passing_read,
#                                  '% Overall Passing': percent_overall_passing})

## Top Performing Schools (By % Overall Passing)

* Sort and display the top five performing schools by % overall passing.

## Bottom Performing Schools (By % Overall Passing)

* Sort and display the five worst-performing schools by % overall passing.

## Math Scores by Grade

* Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

## Reading Score by Grade 

* Perform the same operations as above for reading scores

## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

## Scores by School Size

* Perform the same operations as above, based on school size.

## Scores by School Type

* Perform the same operations as above, based on school type