### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

# Display summary of Data frame
school_data_complete.head()




## District Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average math score 

* Calculate the average reading score

* Calculate the overall passing rate (overall average score), i.e. (avg. math score + avg. reading score)/2

* Calculate the percentage of students with a passing math score (70 or greater)

* Calculate the percentage of students with a passing reading score (70 or greater)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

In [None]:
#Calculate the total number of schools - Complete
school_count = len(school_data_complete['school_name'].value_counts())

#Calculate the total number of students - Complete
student_count = school_data_complete['Student ID'].count()

#Calculate the total budget
total_budget = school_data.budget.sum()

#Calculate the average math score
avg_math_score = student_data.math_score.mean()

#Calculate the average reading score
avg_reading_score = student_data.reading_score.mean()

#Calculate the overall passing rate (overall average score), i.e. (avg. math score + avg. reading score)/2
overall_avg_score = (avg_math_score + avg_reading_score) / 2

#Calculate the percentage of students with a passing math score (70 or greater)
math = student_data.loc[student_data.math_score>=70]
passing_math = (math.math_score.count() / student_count) * 100

#Calculate the percentage of students with a passing reading score (70 or greater)
reading = student_data.loc[student_data.reading_score>=70]
passing_reading = (reading.reading_score.count() / student_count) * 100

#Create a dataframe to hold the above results
school_results = pd.DataFrame({'School Count': [school_count], 'Student Count': [student_count], 'Total Budget':
                               [total_budget], 'Average Math Score': [avg_math_score], 'Average Reading Score':
                                [avg_reading_score], 'Overall Passing Rate': [overall_avg_score], 'Percentage Passing Math':
                                [passing_math], 'Percentage Passing Reading': [passing_reading]})
school_results.head()

#Optional: give the displayed data cleaner formatting


## School Summary

* Create an overview table that summarizes key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)
  
* Create a dataframe to hold the above results

## Top Performing Schools (By Passing Rate)

* Sort and display the top five schools in overall passing rate

In [None]:
#copy data into new Data Frame
school_summary = school_data
school_summary.set_index('School ID')

#Add column to new data frame and calculate budget per student
school_summary["per_student_budget"] = (school_summary["budget"] / school_summary["size"])

#Calculate Average Reading Score and group by school
avg_reading_score = school_data_complete.groupby("school_name").reading_score.mean().reset_index()
avg_reading_score.rename(columns={'reading_score': 'avg_reading_score'}, inplace=True)

#Calculate Average Math Score and group by school
avg_math_score = school_data_complete.groupby("school_name").math_score.mean().reset_index()
avg_math_score.rename(columns={'math_score': 'avg_math_score'}, inplace=True)

#Calculate Passing Math Count
passing_math_count = school_data_complete[school_data_complete['math_score'] >= 70].groupby('school_name').count().reset_index()
passing_math = passing_math_count[['school_name', 'math_score']]
passing_math.rename(columns={'school_name': 'school_name', 'math_score': 'count_passing_math'}, inplace=True)

#Caclulate Passing Reading Count
passing_reading_count = school_data_complete[school_data_complete['reading_score'] >= 70].groupby('school_name').count().reset_index()
passing_reading = passing_reading_count[['school_name', 'reading_score']]
passing_reading.rename(columns={'school_name': 'school_name', 'reading_score': 'count_passing_reading'}, inplace=True)

#Merge All Summary Dataframes
school_summary_complete = pd.merge(school_summary, avg_reading_score, how="inner", on="school_name")
school_summary_complete = pd.merge(school_summary_complete, avg_math_score, how="inner", on="school_name")
school_summary_complete = pd.merge(school_summary_complete, passing_math, how="inner", on="school_name")
school_summary_complete = pd.merge(school_summary_complete, passing_reading, how="inner", on="school_name")

#Convert Passing Scores to % (this was tricky since pandas has a method called "size")
school_summary_complete['pct_passing_reading'] = (school_summary_complete.count_passing_reading / school_summary_complete['size']) * 100
school_summary_complete['pct_passing_math'] = (school_summary_complete.count_passing_math / school_summary_complete['size']) * 100

#Calculate Overall Passing Rate
school_summary_complete['Overall Passing Rate'] = (school_summary_complete.pct_passing_reading + school_summary_complete.pct_passing_math) / 2


#Cleanup column headings and drop unnecessary data
school_summary_complete.drop(['count_passing_math', 'count_passing_reading'], axis=1, inplace=True)
school_summary_complete.rename(columns={'school_name': 'School Name', 'type': 'School Type', 'size': 'Student Population',
                                        'budget': 'Budget', 'per_student_budget': 'Budget per Student', 'avg_reading_score': 
                                        'Average Reading Score', 'avg_math_score': 'Average Math Score', 'pct_passing_reading':
                                       'Percent Passing Reading', 'pct_passing_math': 'Percent Passing Math'}, inplace=True)


#Sort and display the top five schools in overall passing rate
school_summary_complete.sort_values(by=['Overall Passing Rate'], ascending=False).head(5)
 








## Bottom Performing Schools (By Passing Rate)

* Sort and display the five worst-performing schools

In [None]:
#Sort and display the five schools in overall passing rate
school_summary_complete.sort_values(by=['Overall Passing Rate'], ascending=True).head(5)

## Math Scores by Grade

* Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

In [None]:
#create separate tables for each grade
ninth_grades = student_data.loc[student_data['grade'] == '9th']
tenth_grades = student_data.loc[student_data['grade'] == '10th']
eleventh_grades = student_data.loc[student_data['grade'] == '11th']
twelvth_grades = student_data.loc[student_data['grade'] == '12th']

#calculate the average math scores for each grade
ninth_grades_schools = ninth_grades.groupby('school_name').math_score.mean().reset_index()
tenth_grades_schools = tenth_grades.groupby('school_name').math_score.mean().reset_index()
eleventh_grades_schools = eleventh_grades.groupby('school_name').math_score.mean().reset_index()
twelvth_grades_schools = twelvth_grades.groupby('school_name').math_score.mean().reset_index()

#merge the dataframes to create summary table
math_scores_by_grade = pd.merge(ninth_grades_schools, tenth_grades_schools, how="outer", on="school_name")
math_scores_by_grade = math_scores_by_grade.rename(columns={'math_score_x': '9th Grade Avg', 'math_score_y': '10th Grade Avg'})
math_scores_by_grade = pd.merge(math_scores_by_grade, eleventh_grades_schools, how="outer", on="school_name")
math_scores_by_grade = math_scores_by_grade.rename(columns={'math_score': '11th Grade Avg'})
math_scores_by_grade = pd.merge(math_scores_by_grade, twelvth_grades_schools, how="outer", on="school_name")
math_scores_by_grade = math_scores_by_grade.rename(columns={'math_score': '12th Grade Avg'})

math_scores_by_grade.head(25)


## Reading Score by Grade 

* Perform the same operations as above for reading scores

In [None]:
#create separate tables for each frame
ninth_grades = student_data.loc[student_data['grade'] == '9th']
tenth_grades = student_data.loc[student_data['grade'] == '10th']
eleventh_grades = student_data.loc[student_data['grade'] == '11th']
twelvth_grades = student_data.loc[student_data['grade'] == '12th']

#calculate the average reading scores for each grade
ninth_grades_schools = ninth_grades.groupby('school_name').reading_score.mean().reset_index()
tenth_grades_schools = tenth_grades.groupby('school_name').reading_score.mean().reset_index()
eleventh_grades_schools = eleventh_grades.groupby('school_name').reading_score.mean().reset_index()
twelvth_grades_schools = twelvth_grades.groupby('school_name').reading_score.mean().reset_index()

#merge the dataframes for each grade
reading_scores_by_grade = pd.merge(ninth_grades_schools, tenth_grades_schools, how="outer", on="school_name")
reading_scores_by_grade = math_scores_by_grade.rename(columns={'reading_score_x': '9th Grade Avg', 'reading_score_y': '10th Grade Avg'})
reading_scores_by_grade = pd.merge(math_scores_by_grade, eleventh_grades_schools, how="outer", on="school_name")
reading_scores_by_grade = math_scores_by_grade.rename(columns={'reading_score': '11th Grade Avg'})
reading_scores_by_grade = pd.merge(math_scores_by_grade, twelvth_grades_schools, how="outer", on="school_name")
reading_scores_by_grade = math_scores_by_grade.rename(columns={'reading_score': '12th Grade Avg'})

reading_scores_by_grade.head(25)

## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

In [None]:
# Sample bins. Feel free to create your own bins.
spending_bins = [0, 585, 615, 645, 675]
group_names = ["<$585", "$585-615", "$615-645", "$645-675"]

In [None]:
#school_summary_complete
#school_summary_complete.head()
school_summary_complete["School Spending"] = pd.cut(school_summary_complete["Budget per Student"], spending_bins, labels=group_names)

#ted_df["View Group"] = pd.cut(ted_df["views"], bins, labels=group_labels)
school_summary_complete = school_summary_complete.groupby("School Spending")
#print(school_summary_complete)
#grp = df.groupby['colName']
school_summary_complete.describe()


## Scores by School Size

* Perform the same operations as above, based on school size.

In [None]:
# Sample bins. Feel free to create your own bins.
size_bins = [0, 1000, 2000, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]

## Scores by School Type

* Perform the same operations as above, based on school type.