In [None]:
# Dependencies and Setup
import pandas as pd

# File to Load 
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas Data Frames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset (consider using a left join)

df = pd.merge(student_data, school_data, on='school_name', how = 'left')
df.head()

## District Summary

In [None]:
# Calculate the Totals (Schools and Students)
students = df['Student ID'].count()
schools = df['school_name'].nunique()

# Calculate the Total Budget
budget = school_data['budget'].sum()

# Calculate the Average Scores

avg_math = df['math_score'].mean()
avg_reading = df['reading_score'].mean()

# Calculate the Percentage Pass Rates

math_pass = df.loc[df['math_score'] >= 70]
read_pass = df.loc[df['reading_score'] >= 70]
both_pass = df.loc[readpass.index & mathpass.index]

math_pct = len(mathpass)/ student_count
read_pct = len(readpass)/student_count
both_pct = len(bothpass)/student_count

# Minor Data Cleanup

summary_df = pd.DataFrame({"Total Students" : [students], "Total Schools" : [schools], 
                           "Total Budget" : [budget], "Average Math Score" : [avg_math], 
                           "Average Reading Score" : [avg_reading], "Stu. Pass % (Math)" : [math_pct],
                           "Stu. Pass % (Reading)" : [read_pct], "Stu. Pass % (Both)" : [both_pct]})

summary_df['Total Students'] = summary_df['Total Students'].map("{:,}".format)
summary_df['Total Budget'] = summary_df['Total Budget'].map("${:,}".format)
summary_df['Average Math Score'] = summary_df['Average Math Score'].map("{:.2f}".format)
summary_df['Average Reading Score'] = summary_df['Average Reading Score'].map("{:.2f}".format)
summary_df['Stu. Pass % (Math)'] = summary_df['Stu. Pass % (Math)'].map("{:.2%}".format)
summary_df['Stu. Pass % (Reading)'] = summary_df['Stu. Pass % (Reading)'].map("{:.2%}".format)
summary_df['Stu. Pass % (Both)'] = summary_df['Stu. Pass % (Both)'].map("{:.2%}".format)

#Display the data frame
summary_df

## School Summary

In [None]:
# Determine the School Type
school_types = school_data.set_index(['school_name'])['type']

# Calculate the total student count
student_count = df["school_name"].value_counts()

# Calculate the total school budget and per capita spending
tot_school_budget = df.groupby('school_name').max()['budget']

student_budget = tot_school_budget / student_count

# Calculate the average test scores
mathavg = df.groupby('school_name').mean()['math_score']
readingavg = df.groupby('school_name').mean()['reading_score']

# Calculate the passing scores by creating a filtered data frame

passmath = df[df["math_score"]>=70]
passread = df[df["reading_score"]>=70]

passmathpct = passmath.groupby('school_name').count()['math_score'] / student_count 
passreadpct = passread.groupby('school_name').count()['reading_score'] / student_count 

passboth = (passmathpct + passreadpct)/2
            
# Convert to data frame/Minor data munging

schoolsummary_df = pd.DataFrame({"Total Students" : student_count, "School Type" : school_types, 
                                 "Total School Budget" : tot_school_budget, "Budget per Student" : student_budget, 
                                 "Average Math Score" : mathavg, "Average Reading Score" : readingavg, 
                                 "Stu. Pass % (Math)" : passmathpct ,"Stu. Pass % (Reading)" : passreadpct, 
                                 "Stu. Pass % (Both)" : passboth})

schoolsummaryform_df = schoolsummary_df.copy()

schoolsummaryform_df['Total Students'] = schoolsummary_df['Total Students'].map("{:,}".format)
schoolsummaryform_df['Total School Budget'] = schoolsummary_df['Total School Budget'].map("${:,}".format)
schoolsummaryform_df['Budget per Student'] = schoolsummary_df['Budget per Student'].map("${:,}".format)
schoolsummaryform_df['Average Math Score'] = schoolsummary_df['Average Math Score'].map("{:.2f}".format)
schoolsummaryform_df['Average Reading Score'] = schoolsummary_df['Average Reading Score'].map("{:.2f}".format)
schoolsummaryform_df['Stu. Pass % (Math)'] = schoolsummary_df['Stu. Pass % (Math)'].map("{:.2%}".format)
schoolsummaryform_df['Stu. Pass % (Reading)'] = schoolsummary_df['Stu. Pass % (Reading)'].map("{:.2%}".format)
schoolsummaryform_df['Stu. Pass % (Both)'] = schoolsummary_df['Stu. Pass % (Both)'].map("{:.2%}".format)

# Display the data frame

schoolsummaryform_df


## Top Performing Schools (By Passing Rate)

In [None]:
# Sort and show top five schools
aschools = schoolsummaryform_df.sort_values('Stu. Pass % (Both)', ascending = False)

aschools.head()

## Bottom Performing Schools (By Passing Rate)

In [None]:
# Sort and show bottom five schools
dschools = schoolsummaryform_df.sort_values('Stu. Pass % (Both)')

dschools.head()

## Math Scores by Grade

In [None]:
# Create data series of scores by grade levels using conditionals

ninth = df[(df['grade'] == '9th')]
tenth = df[(df['grade'] == '10th')]
eleventh = df[(df['grade'] == '11th')]
twelfth = df[(df['grade'] == '12th')]

# Group each by school name

ninthgrp = ninth.groupby(['school_name']).mean()['math_score']
tenthgrp = tenth.groupby(['school_name']).mean()['math_score']
eleventhgrp = eleventh.groupby(['school_name']).mean()['math_score']
twelfthgrp = twelfth.groupby(['school_name']).mean()['math_score']

# Combine series into single data frame

mathscores_grade = pd.DataFrame({"9th" : ninthgrp, "10th" : tenthgrp, '11th' : eleventhgrp, '12th' : twelfthgrp})

# Minor data munging

mathscores_grade['9th'] = mathscores_grade['9th'].map("{:.2f}".format)
mathscores_grade['10th'] = mathscores_grade['10th'].map("{:.2f}".format)
mathscores_grade['11th'] = mathscores_grade['11th'].map("{:.2f}".format)
mathscores_grade['12th'] = mathscores_grade['12th'].map("{:.2f}".format)

# Display the data frame

final_math = mathscores_grade.reset_index()
final_math

## Reading Score by Grade 

In [None]:
# Create data series of scores by grade levels using conditionals

ninth = df[(df['grade'] == '9th')]
tenth = df[(df['grade'] == '10th')]
eleventh = df[(df['grade'] == '11th')]
twelfth = df[(df['grade'] == '12th')]

# Group each by school name

ninthgrp = ninth.groupby(['school_name']).mean()['reading_score']
tenthgrp = tenth.groupby(['school_name']).mean()['reading_score']
eleventhgrp = eleventh.groupby(['school_name']).mean()['reading_score']
twelfthgrp = twelfth.groupby(['school_name']).mean()['reading_score']

# Combine series into single data frame

readscores_grade = pd.DataFrame({"9th" : ninthgrp, "10th" : tenthgrp, '11th' : eleventhgrp, '12th' : twelfthgrp})

# Minor data munging
readscores_grade['9th'] = readscores_grade['9th'].map("{:.2f}".format)
readscores_grade['10th'] = readscores_grade['10th'].map("{:.2f}".format)
readscores_grade['11th'] = readscores_grade['11th'].map("{:.2f}".format)
readscores_grade['12th'] = readscores_grade['12th'].map("{:.2f}".format)


# Display the data frame
final_reading = readscores_grade.reset_index()
final_reading

## Scores by School Spending

In [None]:
# Establish the bins -- choose any set of bins you would like, but see below for testing bins
# to test, set your bins as follows: [0, 585, 615, 645, 675]
# ALSO -- Note that the values for `% Passing Math`, `% Passing Reading` and `% Overall Passing Rate`
# were computed using averages of averages -- your results may vary if you use weighted averages 

# Categorize the spending based on the bins
bins = [0, 590, 615, 645, 655]
group_names = ["Less than $590", "$590-615", "$615-645", "645-675"]
schoolsummary_df['Spending Ranges'] = pd.cut(schoolsummary_df['Budget per Student'], bins, labels = group_names, include_lowest = True)
# Assemble into data frame

schoolspending_df = schoolsummary_df.groupby('Spending Ranges').mean()

# Minor data munging
schoolspending_df = schoolspending_df[['Average Math Score', 'Average Reading Score', 'Stu. Pass % (Math)', 'Stu. Pass % (Reading)', 'Stu. Pass % (Both)']]

schoolspending_df['Average Math Score'] = schoolspending_df['Average Math Score'].map("{:.2f}".format)
schoolspending_df['Average Reading Score'] = schoolspending_df['Average Reading Score'].map("{:.2f}".format)
schoolspending_df['Stu. Pass % (Both)'] = schoolspending_df['Stu. Pass % (Both)'].map("{:.2%}".format)
schoolspending_df['Stu. Pass % (Math)'] = schoolspending_df['Stu. Pass % (Math)'].map("{:.2%}".format)
schoolspending_df['Stu. Pass % (Reading)'] = schoolspending_df['Stu. Pass % (Reading)'].map("{:.2%}".format)

# Display results
schoolspending_df

## Scores by School Size

In [None]:
# Establish the bins 

bins = [0, 1000, 2000, 5000]
group_names = ["Small", "Medium", "Large"]

# Categorize the spending based on the bins
schoolsummary_df['School Size'] = pd.cut(schoolsummary_df['Total Students'], bins, labels = group_names, include_lowest = True)

# Calculate the scores based on bins
schoolsize_df = schoolsummary_df.groupby('School Size').mean()

# Assemble into data frame

# Minor data munging

schoolsize_df = schoolsize_df[['Average Math Score', 'Average Reading Score', 'Stu. Pass % (Math)', 'Stu. Pass % (Reading)', 'Stu. Pass % (Both)']]

schoolsize_df['Average Math Score'] = schoolsize_df['Average Math Score'].map("{:.2f}".format)
schoolsize_df['Average Reading Score'] = schoolsize_df['Average Reading Score'].map("{:.2f}".format)
schoolsize_df['Stu. Pass % (Both)'] = schoolsize_df['Stu. Pass % (Both)'].map("{:.2%}".format)
schoolsize_df['Stu. Pass % (Math)'] = schoolsize_df['Stu. Pass % (Math)'].map("{:.2%}".format)
schoolsize_df['Stu. Pass % (Reading)'] = schoolsize_df['Stu. Pass % (Reading)'].map("{:.2%}".format)

# Display results
schoolsize_df

## Scores by School Type

In [None]:
# Type | Average Math Score | Average Reading Score | % Passing Math | % Passing Reading | % Overall Passing Rate

# Assemble into data frame
schooltype_df = schoolsummary_df.groupby('School Type').mean()
schooltype_df = schooltype_df[['Average Math Score', 'Average Reading Score', 'Stu. Pass % (Math)', 'Stu. Pass % (Reading)', 'Stu. Pass % (Both)']]

# Minor data munging
schooltype_df['Average Math Score'] = schooltype_df['Average Math Score'].map("{:.2f}".format)
schooltype_df['Average Reading Score'] = schooltype_df['Average Reading Score'].map("{:.2f}".format)
schooltype_df['Stu. Pass % (Both)'] = schooltype_df['Stu. Pass % (Both)'].map("{:.2%}".format)
schooltype_df['Stu. Pass % (Math)'] = schooltype_df['Stu. Pass % (Math)'].map("{:.2%}".format)
schooltype_df['Stu. Pass % (Reading)'] = schooltype_df['Stu. Pass % (Reading)'].map("{:.2%}".format)

# Display results
schooltype_df