In [142]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_load = "Resources/schools_complete.csv"
student_data_load = "Resources/students_complete.csv"

# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_load)
student_data = pd.read_csv(student_data_load)

# Combine the data into a single dataset  
school_data_all_df = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

school_data_all_df

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635
...,...,...,...,...,...,...,...,...,...,...,...
39165,39165,Donna Howard,F,12th,Thomas High School,99,90,14,Charter,1635,1043130
39166,39166,Dawn Bell,F,10th,Thomas High School,95,70,14,Charter,1635,1043130
39167,39167,Rebecca Tanner,F,9th,Thomas High School,73,84,14,Charter,1635,1043130
39168,39168,Desiree Kidd,F,10th,Thomas High School,99,90,14,Charter,1635,1043130


In [143]:
school_data_all_df.columns

Index(['Student ID', 'student_name', 'gender', 'grade', 'school_name',
       'reading_score', 'math_score', 'School ID', 'type', 'size', 'budget'],
      dtype='object')

In [171]:
# Calculate the total number of schools
tot_schools = len(pd.unique(school_data['school_name']))
print(tot_schools)

15


In [172]:
# Caluculate the total number of students
tot_students_dist = student_data['Student ID'].count()
print(tot_students_dist)

39170


In [169]:
# Calculate the total budget
tot_budget_dist = school_data['budget'].sum()
tot_budget_dist

24649428

In [173]:
# Calculate the averge math score
avg_math_dist = student_data['math_score'].mean()
print(avg_math_dist)

78.98537145774827


In [174]:
# Caluclate the average reading score
avg_read_dist = student_data['reading_score'].mean()
print(avg_read_dist)

81.87784018381414


In [175]:
# Calculate percent passed math
# --> corrected threshold based on notes in assignment

pass_math_dist = len(school_data_all_df.loc[school_data_all_df['math_score'] > 69]) / tot_students_dist * 100

pass_math_dist

74.9808526933878

In [176]:
# Calculate percent passed reading
# --> corrected threshold based on notes in assignment

pass_read_dist = len(school_data_all_df.loc[school_data_all_df['reading_score'] > 69]) / tot_students_dist * 100

pass_read_dist

85.80546336482001

In [177]:
# Calculate percent passing both math and reading
# --> figured this out once I corrected threshold

pass_mathread_dist = len(school_data_all_df.loc[(school_data_all_df['math_score'] > 69) & (
    school_data_all_df['reading_score'] > 69)]) / tot_students_dist * 100

pass_mathread_dist

65.17232575950983

In [192]:
dist_summary_df = pd.DataFrame(
    {"Total # of Schools": [tot_schools],
     "Total # of Students": [tot_students_dist],
     "Total Budget": [tot_budget_dist],
     "Average Math Score": [avg_math_dist],
     "Average Reading Score": [avg_read_dist],
     "% Passing Math": [pass_math_dist],
     "% Passing Reading": [pass_read_dist],
     "% Overall Passing": [pass_mathread_dist]
     }
)

# Format the District Summary Columns

dist_summary_df["Total # of Students"] = dist_summary_df["Total # of Students"].map("{:,}".format)
dist_summary_df["Total Budget"] = dist_summary_df["Total Budget"].map("${:,}".format)
dist_summary_df["Average Math Score"] = dist_summary_df["Average Math Score"].map("{:.0f}".format)
dist_summary_df["Average Reading Score"] = dist_summary_df["Average Reading Score"].map("{:.0f}".format)
dist_summary_df["% Passing Math"] = dist_summary_df["% Passing Math"].map("{:.1f}%".format)
dist_summary_df["% Passing Reading"] = dist_summary_df["% Passing Reading"].map("{:.1f}%".format)
dist_summary_df["% Overall Passing"] = dist_summary_df["% Overall Passing"].map("{:.1f}%".format)

print("DISTRICT SUMMARY")
dist_summary_df

DISTRICT SUMMARY


Unnamed: 0,Total # of Schools,Total # of Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428",79,82,75.0%,85.8%,65.2%
