In [21]:
# import dependencies
import pandas as pd
from pathlib import Path

# Load CSV files
school_data_to_load = Path('schools_complete.csv')
student_data_to_load = Path('students_complete.csv')

school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

# Merge data into one DataFrame
school_df = pd.merge(student_data, school_data, how='left', on=['school_name', 'school_name'])
school_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [22]:
## District Summary

In [23]:
# Total number of unique schools
total_schools = school_df['school_name'].nunique()
total_schools

# Total number of students
total_students = school_df['student_name'].count()
total_students

# Total budget
total_budget = school_df['budget'].sum()
total_budget

# Average math score
math_score_avg = school_df['math_score'].mean()
math_score_avg

# Average reading score
reading_score_avg = school_df['reading_score'].mean()
reading_score_avg

# Percentange of students with passing math scores
passing_math = school_df[(school_df['math_score'] >= 70)].count()["student_name"] / float(total_students)* 100
passing_math

# Percentage of students with passing reading scores
passing_reading = school_df[(school_df['reading_score'] >= 70)].count()["student_name"] / float(total_students) * 100
passing_reading

# Percentage of students with passing reading and math scores
passing_overall = school_df[(school_df['math_score'] >= 70)] & school_df[(school_df['reading_score'] >= 70)].count()["student_name"] / float(total_students) * 100
passing_overall

#District Summary DF
district_summary = pd.DataFrame({
    'Total Schools': [total_schools],
    'Total Students': [total_students],
    'Total Budget': [total_budget],
    'Average Math Score': [math_score_avg],
    '% Passing Math': [passing_math],
    '% Passing Reading': [passing_reading],
    '% Overall Passing': [passing_overall]
})
district_summary

In [24]:
## School Summary

In [25]:
# School type
school_type = school_df.set_index('school_name')['type']

# Total students by school
total_students_school = school_df["school_name"].value_counts()

# Total school budget
total_school_budget = school_df.groupby(["school_name"])["budget"].mean()

# Budget per student
per_student_budget = total_school_budget / total_students_school

# Average math score by school
avg_math_score_school = school_df.groupby(["school_name"])["math_score"].mean()

# Average reading score by school
avg_reading_score_school = school_df.groupby(["school_name"])["reading_score"].mean()

# Percentage passing math by school
students_passing_math = school_df[(school_df["math_score"] >= 70)]
passing_math_percentage_school = students_passing_math.groupby(["school_name"]).size()

# Percentage reading by school
students_passing_reading = school_df[(school_df["reading_score"] >= 70)]
passing_reading_percentage_school = students_passing_reading.groupby(["school_name"]).size()

# Overall passing percentage by school
students_overall_passing = school_df[
    (school_df["reading_score"] >= 70) & (school_df["math_score"] >= 70)
]
overall_passing_percentage = students_overall_passing.groupby(["school_name"]).size()

# School Summary DF
school_summary = pd.DataFrame({
    'School Type': school_type,
    'Total Students': total_students_school,
    'Total School Budget': total_school_budget,
    'Per Student Budget' : per_student_budget,
    'Average Math Score' : avg_math_score_school,
    'Average Reading Score' : avg_reading_score_school,
    '% Passing Math' : passing_math_percentage_school,
    '% Passing Reading' : passing_reading_percentage_school
})
school_summary


NameError: name 'avg_reading_school' is not defined

In [None]:
## Highest-Performing Schools

In [None]:
top_schools = school_summary.sort_values(by='% Overall Passing', ascending=False).head(5)
top_schools

In [None]:
## Lowest-Performing Schools

In [None]:
bottom_schools = school_summary.sort_values(by='% Overall Passing').head(5)
bottom_schools

In [None]:
## Math Score by Grade

In [None]:
ninth_graders = school_df[(school_df["grade"] == "9th")]
tenth_graders = school_df[(school_df["grade"] == "10th")]
eleventh_graders = school_df[(school_df["grade"] == "11th")]
twelfth_graders = school_df[(school_df["grade"] == "12th")]

# Group by `school_name` and take the mean of the `math_score` column for each.
ninth_grade_math_scores = ninth_graders.groupby(["school_name"])["math_score"].mean()
tenth_grader_math_scores = tenth_graders.groupby(["school_name"])["math_score"].mean()
eleventh_grader_math_scores = eleventh_graders.groupby(["school_name"])["math_score"].mean()
twelfth_grader_math_scores = twelfth_graders.groupby(["school_name"])["math_score"].mean()

# Combine each of the scores above into single DataFrame called `math_scores_by_grade`
math_scores_by_grade = pd.DataFrame(
    {
        "9th": ninth_grade_math_scores, 
        "10th": tenth_grader_math_scores,
        "11th": eleventh_grader_math_scores, 
        "12th": twelfth_grader_math_scores
    }
)

# Minor data wrangling
math_scores_by_grade.index.name = None

# Display the DataFrame
math_scores_by_grade

In [None]:
## Reading Scores by Grade

In [None]:
# Use the code provided to separate the data by grade
ninth_graders = school_df[(school_df["grade"] == "9th")]
tenth_graders = school_df[(school_df["grade"] == "10th")]
eleventh_graders = school_df[(school_df["grade"] == "11th")]
twelfth_graders = school_df[(school_df["grade"] == "12th")]

# Group by `school_name` and take the mean of the the `reading_score` column for each.
ninth_grade_reading_scores = ninth_graders.groupby(["school_name"])["reading_score"].mean()
tenth_grader_reading_scores = tenth_graders.groupby(["school_name"])["reading_score"].mean()
eleventh_grader_reading_scores = eleventh_graders.groupby(["school_name"])["reading_score"].mean()
twelfth_grader_reading_scores = twelfth_graders.groupby(["school_name"])["reading_score"].mean()

# Combine each of the scores above into single DataFrame called `reading_scores_by_grade`
reading_scores_by_grade = pd.DataFrame(
    {
        "9th": ninth_grade_reading_scores, 
        "10th": tenth_grader_reading_scores,
        "11th": eleventh_grader_reading_scores, 
        "12th": twelfth_grader_reading_scores
    }
)

# Minor data wrangling
reading_scores_by_grade = reading_scores_by_grade[["9th", "10th", "11th", "12th"]]
reading_scores_by_grade.index.name = None

# Display the DataFrame
reading_scores_by_grade

In [None]:
## Scores by School Spending

In [None]:
# Creating bins
spending_bins = [0, 585, 630, 645, 680]
labels = ["<$585", "$585-630", "$630-645", "$645-680"]

# Create a copy of the school summary since it has the "Per Student Budget" 
school_spending_df = school_summary.copy()

# Categorize based on bins
school_spending_df["Spending Ranges (Per Student)"] = pd.cut(per_student_budget, spending_bins, labels=labels, right=False)
school_spending_df

#  Calculate averages for score columns 
spending_math_scores = school_spending_df.groupby(["Spending Ranges (Per Student)"])["Average Math Score"].mean()
spending_reading_scores = school_spending_df.groupby(["Spending Ranges (Per Student)"])["Average Reading Score"].mean()
spending_passing_math = school_spending_df.groupby(["Spending Ranges (Per Student)"])["% Passing Math"].mean()
spending_passing_reading = school_spending_df.groupby(["Spending Ranges (Per Student)"])["% Passing Reading"].mean()
overall_passing_spending = school_spending_df.groupby(["Spending Ranges (Per Student)"])["% Overall Passing"].mean()

 # Create DataFrame
spending_summary = pd.DataFrame(
    {
        "Average Math Score" : spending_math_scores,
        "Average Reading Score": spending_reading_scores,
        "% Passing Math": spending_passing_math,
        "% Passing Reading": spending_passing_reading,
        "% Overall Passing": overall_passing_spending
    }
)

# Display DF
spending_summary

In [None]:
## Scores by School Size

In [None]:
# Bin by school size
size_bins = [0, 1000, 2000, 5000]
size_labels = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]
school_summary['School Size'] = pd.cut(school_summary['Total Students'], size_bins, labels=size_labels)

# Calculate averages for the desired columns. 
size_math_scores = school_summary.groupby(["School Size"])["Average Math Score"].mean()
size_reading_scores = school_summary.groupby(["School Size"])["Average Reading Score"].mean()
size_passing_math = school_summary.groupby(["School Size"])["% Passing Math"].mean()
size_passing_reading = school_summary.groupby(["School Size"])["% Passing Reading"].mean()
size_overall_passing = school_summary.groupby(["School Size"])["% Overall Passing"].mean()

# Create DataFrame based on school size
size_summary = pd.DataFrame(
    {
        "Average Math Score" : size_math_scores,
        "Average Reading Score": size_reading_scores,
        "% Passing Math": size_passing_math,
        "% Passing Reading": size_passing_reading,
        "% Overall Passing": size_overall_passing
    }
)

# Display DF
size_summary

In [None]:
## Scores by School Type

In [None]:
# Group the per_school_summary DataFrame by "School Type" and average the results.
average_math_score_by_type = school_summary.groupby(["School Type"])["Average Math Score"].mean()
average_reading_score_by_type = school_summary.groupby(["School Type"])["Average Reading Score"].mean()
average_percent_passing_math_by_type = school_summary.groupby(["School Type"])["% Passing Math"].mean()
average_percent_passing_reading_by_type = school_summary.groupby(["School Type"])["% Passing Reading"].mean()
average_percent_overall_passing_by_type = school_summary.groupby(["School Type"])["% Overall Passing"].mean()

# DataFrame based on school type
type_summary = pd.DataFrame(
    {
        "Average Math Score" : average_math_score_by_type,
        "Average Reading Score": average_reading_score_by_type,
        "% Passing Math": average_percent_passing_math_by_type,
        "% Passing Reading": average_percent_passing_reading_by_type,
        "% Overall Passing": average_percent_overall_passing_by_type
    }
)

# Display DF
type_summary