In [59]:
import pandas as pd
import os

In [60]:
# Files to load
school_data_load = os.path.join("resources", "schools_complete.csv")
student_data_load = os.path.join("resources", "clean_students_complete.csv")
missing_grades_load = os.path.join("resources", "missing_grades.csv")

In [61]:
#create dataframes
missing_grades_df = pd.read_csv(missing_grades_load)
school_data_df = pd.read_csv(school_data_load)
student_data_df = pd.read_csv(student_data_load)

#combine datasets into a single dataframe
school_data_complete_df = pd.merge(student_data_df, school_data_df, on=["school_name", "school_name"])

In [62]:
#display combined dataframe
school_data_complete_df.head()

Unnamed: 0.1,Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,3,Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [63]:
#Get total count of students
student_count = school_data_complete_df["student_name"].count()
print(f"Total number of students: {student_count}")

Total number of students: 39170


In [64]:
#get the count of schools
school_count = len(school_data_complete_df["school_name"].unique())
print(f"Total number of schools: {school_count}")

Total number of schools: 15


In [65]:
#get total budget
budget_total = sum(school_data_complete_df["budget"].unique())
print(f"Total budget: {budget_total}")

Total budget: 24649428


In [66]:
#get average reading score
reading_avg = school_data_complete_df["reading_score"].mean()
print(f"The average reading score is {reading_avg}")

The average reading score is 81.87784018381414


In [67]:
#get average math score
math_avg = school_data_complete_df["math_score"].mean()
print(f"The average math score is {math_avg}")

The average math score is 78.98537145774827


In [68]:
# Get all the students who are passing math in a new DataFrame.
passing_math = school_data_complete_df[school_data_complete_df["math_score"] >= 70]
# Get all the students that are passing reading in a new DataFrame.
passing_reading = school_data_complete_df[school_data_complete_df["reading_score"] >= 70]

In [69]:
passing_math_count = passing_math["student_name"].count()
passing_math_count

29370

In [70]:
passing_reading_count = passing_reading["student_name"].count()
passing_reading_count

33610

In [81]:
#get percentage of passing reading grades
reading_pass_percent = ((passing_reading_count / float(student_count)) * 100)


#get percentage of passing math grades
math_pass_percent = ((passing_math_count / float(student_count)) * 100)
print(reading_pass_percent)
print(math_pass_percent)

85.80546336482001
74.9808526933878


In [84]:
# Calculate the students who passed both math and reading.
passing_math_reading = school_data_complete_df[(school_data_complete_df["math_score"] >= 70) & (school_data_complete_df["reading_score"] >= 70)]

passing_math_reading.head()

Unnamed: 0.1,Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
4,4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
5,5,5,Bryan Miranda,M,9th,Huang High School,94,94,0,District,2917,1910635
6,6,6,Sheena Carter,F,11th,Huang High School,82,80,0,District,2917,1910635
8,8,8,Michael Roth,M,10th,Huang High School,95,87,0,District,2917,1910635
9,9,9,Matthew Greene,M,10th,Huang High School,96,84,0,District,2917,1910635


In [86]:
# Calculate the number of students who passed both math and reading.
overall_passing_math_reading_count = passing_math_reading["student_name"].count()
overall_passing_math_reading_count

25528

In [87]:
# Calculate the overall passing percentage.
overall_passing_percentage = overall_passing_math_reading_count / student_count * 100
overall_passing_percentage

65.17232575950983

In [72]:
student_data_df.dtypes

Unnamed: 0        int64
Student ID        int64
student_name     object
gender           object
grade            object
school_name      object
reading_score     int64
math_score        int64
dtype: object

In [114]:
#create dataframe for all summary outputs
district_summary_df = pd.DataFrame([{
    "Total Schools": school_count, 
    "Total Students": student_count,
    "Total Budget": budget_total,
    "Average Math Score": math_avg,
    "Average Reading Score": reading_avg,
    "% Passing Math": math_pass_percent,
    "% Passing Reading": reading_pass_percent,
    "% Overall Passing": overall_passing_percentage,
}])
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,65.172326


In [115]:
#set formatting
district_summary_df["Total Students"] = district_summary_df["Total Students"].map("{:,}".format)
district_summary_df["Total Budget"] = district_summary_df["Total Budget"].map("${:,.2f}".format)
district_summary_df["Average Math Score"] = district_summary_df["Average Math Score"].map("{:.1f}".format)
district_summary_df["Average Reading Score"] = district_summary_df["Average Reading Score"].map("{:.1f}".format)
district_summary_df["% Passing Reading"] = district_summary_df["% Passing Reading"].map("{:.0f}".format)
district_summary_df["% Passing Math"] = district_summary_df["% Passing Math"].map("{:.0f}".format)
district_summary_df["% Overall Passing"] = district_summary_df["% Overall Passing"].map("{:.0f}".format)
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",79.0,81.9,75,86,65


In [None]:
R