In [1]:
### Setup
# Import pandas
import pandas as pd

# Set path to files and save in a variable
schools_file = "Resources/schools_complete.csv"
students_file = "Resources/students_complete.csv"

# Store files in dataframes
schools_df = pd.read_csv(schools_file)
students_df = pd.read_csv(students_file)

# Combine dataframes
df = pd.merge(schools_df, students_df, how = "left", on = ["school_name"])
df = df.rename(columns = {"school_name" : "School Name", "type" : "Type", "size" : "Size", "budget" : "Budget", "student_name" : "Student Name", "gender" : "Gender", "grade" : "Grade", "reading_score" : "Reading Score", "math_score" : "Math Score"})
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'Resources/schools_complete.csv'

In [None]:
### District Summary 

#Count unique Schools
count_unique_schools = len(df["School Name"].unique())
print(count_unique_schools)

#Total students
total_students = df["Student ID"].count()
print(total_students)

#Total budget
list_unique_budgets = df["Budget"].unique()
list_unique_budgets_df = pd.DataFrame(list_unique_budgets)
list_unique_budgets_df.head()

total_budget = list_unique_budgets_df[0].sum()
print(total_budget)

# Average math score
average_math_score = df["Math Score"].mean()
print(average_math_score)

# Average reading score
average_reading_score = df["Reading Score"].mean()
print(average_reading_score)

# Clean up columns 
df = df.dropna(how = 'any')


# % of students who passed math
df = df.astype({"Math Score" : "int64"})
pass_math_list = df.loc[df["Math Score"] >= 70, :]
pass_math_df = pd.DataFrame(pass_math_list)
pass_math_df.head()
count_math_pass = pass_math_df["Student ID"].count()

percent_pass_math = count_math_pass / total_students
print(percent_pass_math)


# % of students who passed reading
df = df.astype({"Reading Score" : "int64"})
pass_reading_list = df.loc[df["Reading Score"] >= 70, :]
pass_reading_df = pd.DataFrame(pass_reading_list)
count_reading_pass = pass_reading_df["Student ID"].count()

percent_pass_reading = count_reading_pass / total_students
print(percent_pass_reading)


# % of students who passed math AND reading
pass_mathreading_list = df.loc[(df["Math Score"] >= 70) & (df["Reading Score"] >= 70), :]
pass_mathreading_df = pd.DataFrame(pass_mathreading_list)
count_mathreading_pass = pass_mathreading_df["Student ID"].count()

percent_pass_mathreading = count_mathreading_pass / total_students
print(percent_pass_mathreading)

# Create summary dataframe
district_summary_df = pd.DataFrame({"Total Unique Schools" : [count_unique_schools],
                                   "Total Students" : [total_students],
                                   "Total Budget" : [total_budget],
                                   "Average Math Score" : [average_math_score],
                                   "Average Reading Score" : [average_reading_score],
                                   "Percent of Students Passing Math" : [percent_pass_math],
                                   "Percent of Students Passing Reading" : [percent_pass_reading],
                                   "Percent of Students Passing Overall" : [percent_pass_mathreading]
                                   })
district_summary_df


In [None]:
### School Summary

#Set Up
school_summary = schools_df.set_index(["school_name"])["type"]
school_summary_df = pd.DataFrame(school_summary)

#Total students per school
students_per_school = df.groupby(["School Name"]).count()["Student ID"]


#Total budget per school
budget_per_school = df.groupby(["School Name"]).mean()["Budget"]


#Total budget per student per school
budget_per_student = budget_per_school / students_per_school


#Average math score per school
total_school_math_score = df.groupby(["School Name"]).sum()["Math Score"]
average_school_math_score = total_school_math_score / students_per_school

#Average reading score per school
total_school_reading_score = df.groupby(["School Name"]).sum()["Reading Score"]
average_school_reading_score = total_school_reading_score / students_per_school


# % of students who passed math per school
df["Pass Math"] = df["Math Score"] >= 70
filter_pass_math_df = df.loc[df["Pass Math"] == True, :]
count_pass_math_per_school = filter_pass_math_df.groupby(["School Name"]).count()["Student ID"]
percent_pass_math_per_school = count_pass_math_per_school / students_per_school


# % of students who passed reading per school
df["Pass Reading"] = df["Reading Score"] >= 70
filter_pass_reading_df = df.loc[df["Pass Reading"] == True, :]
count_pass_reading_per_school = filter_pass_reading_df.groupby(["School Name"]).count()["Student ID"]
percent_pass_reading_per_school = count_pass_reading_per_school / students_per_school

# % of students who passed math AND reading per school
df["Pass Math and Reading"] = ((df["Reading Score"] >= 70) & (df["Math Score"] >= 70))
filter_pass_mathreading_df = df.loc[df["Pass Math and Reading"] == True, :]
count_pass_mathreading_per_school = filter_pass_mathreading_df.groupby(["School Name"]).count()["Student ID"]
percent_pass_mathreading_per_school = count_pass_mathreading_per_school / students_per_school

# Create summary dataframe
school_summary_df = school_summary_df.rename(columns = {"type" : "Type"})
school_summary_df["Total Students"] = students_per_school
school_summary_df["Total Budget"] = budget_per_school
school_summary_df["Per Student Budget"] = budget_per_student
school_summary_df["Average Math Score"] = average_school_math_score
school_summary_df["Average Reading Score"] = average_school_reading_score
school_summary_df["% Passing Math"] = percent_pass_math_per_school
school_summary_df["% Passing Reading"] = percent_pass_reading_per_school
school_summary_df["% Passing Overall"] = percent_pass_mathreading_per_school
school_summary_df.head()


In [None]:
# Highest performing schools by % overall passing
top_schools_df = school_summary_df.sort_values("% Passing Overall", ascending = False)
top_schools_df.head()


In [None]:
# Lowest performing schools by % overall passing
bottom_schools_df = school_summary_df.sort_values("% Passing Overall")
bottom_schools_df.head()

In [None]:
# Math scores by grade
average_math_by_grade = df.groupby(["Grade"]).mean()["Math Score"]
average_math_by_grade_df = pd.DataFrame(average_math_by_grade)
average_math_by_grade_df.head()

In [None]:
# Reading scores by grade
average_reading_by_grade = df.groupby(["Grade"]).mean()["Reading Score"]
average_reading_by_grade_df = pd.DataFrame(average_reading_by_grade)
average_reading_by_grade_df.head()

In [None]:
# Scores by school spending
spending_bins = [0, 585, 630, 645, 680]
labels = ["<$585", "$585-630", "$630-645", "$645-680"]

school_summary_df["Spend Range"] = pd.cut(school_summary_df["Per Student Budget"], spending_bins, labels = labels, include_lowest = True)

spending_math_scores = school_summary_df.groupby(["Spend Range"]).mean()["Average Math Score"]
spending_reading_scores = school_summary_df.groupby(["Spend Range"]).mean()["Average Reading Score"]
spending_passing_math = school_summary_df.groupby(["Spend Range"]).mean()["% Passing Math"]
spending_passing_reading = school_summary_df.groupby(["Spend Range"]).mean()["% Passing Reading"]
overall_passing_spending = school_summary_df.groupby(["Spend Range"]).mean()["% Passing Overall"]

spending_summary_df = pd.DataFrame({"Average Math Score" : [spending_math_scores],
                                   "Average Reading Score" : [spending_reading_scores],
                                   "% Passing Math (Percentage Of Students Who Passed Math)" : [spending_passing_math],
                                   "% Passing Reading (Percentage Of Students Who Passed Reading)" : [spending_passing_reading],
                                   "% Overall Passing (Percentage Of Students Who Passed Math and Reading)" : [overall_passing_spending],
                                   })

spending_summary_df

In [None]:
# Scores by School Size

size_bins = [0, 1000, 2000, 5000]
size_labels = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]

school_summary_df["School Size"] = pd.cut(school_summary_df["Total Students"], size_bins, labels = size_labels, include_lowest = True)

math_scores_size = school_summary_df.groupby(["School Size"]).mean()["Average Math Score"]
reading_scores_size = school_summary_df.groupby(["School Size"]).mean()["Average Reading Score"]
passing_math_size = school_summary_df.groupby(["School Size"]).mean()["% Passing Math"]
passing_reading_size = school_summary_df.groupby(["School Size"]).mean()["% Passing Reading"]
passing_spending_size = school_summary_df.groupby(["School Size"]).mean()["% Passing Overall"]

size_summary_df = pd.DataFrame({"Average Math Score" : [math_scores_size],
                                   "Average Reading Score" : [reading_scores_size],
                                   "% Passing Math (Percentage Of Students Who Passed Math)" : [passing_math_size],
                                   "% Passing Reading (Percentage Of Students Who Passed Reading)" : [passing_reading_size],
                                   "% Overall Passing (Percentage Of Students Who Passed Math and Reading)" : [passing_spending_size],
                                   })

size_summary_df


In [None]:
# Scores by School Type
math_scores_type = school_summary_df.groupby(["Type"]).mean()["Average Math Score"]
reading_scores_type = school_summary_df.groupby(["Type"]).mean()["Average Reading Score"]
passing_math_type = school_summary_df.groupby(["Type"]).mean()["% Passing Math"]
passing_reading_type = school_summary_df.groupby(["Type"]).mean()["% Passing Reading"]
passing_spending_type = school_summary_df.groupby(["Type"]).mean()["% Passing Overall"]

type_summary_df = pd.DataFrame({"Average Math Score" : [math_scores_type],
                                   "Average Reading Score" : [reading_scores_type],
                                   "% Passing Math (Percentage Of Students Who Passed Math)" : [passing_math_type],
                                   "% Passing Reading (Percentage Of Students Who Passed Reading)" : [passing_reading_type],
                                   "% Overall Passing (Percentage Of Students Who Passed Math and Reading)" : [passing_spending_type],
                                   })

type_summary_df