In [2]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

In [3]:
# Read in files
schools_complete = "Resources/schools_complete.csv"
students_complete = "Resources/students_complete.csv"

# Create dataframes from the two files, rename to clean up titles
school_data = pd.read_csv(schools_complete)
school_data = school_data.rename(columns={"School ID": "school_id"})

student_data = pd.read_csv(students_complete)
student_data = student_data.rename(columns={"Student ID": "student_id"})

# Combine data frames in to one 
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

In [4]:
school_data_complete.head()

Unnamed: 0,student_id,student_name,gender,grade,school_name,reading_score,math_score,school_id,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [5]:
school_data_complete.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 39170 entries, 0 to 39169
Data columns (total 11 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   student_id     39170 non-null  int64 
 1   student_name   39170 non-null  object
 2   gender         39170 non-null  object
 3   grade          39170 non-null  object
 4   school_name    39170 non-null  object
 5   reading_score  39170 non-null  int64 
 6   math_score     39170 non-null  int64 
 7   school_id      39170 non-null  int64 
 8   type           39170 non-null  object
 9   size           39170 non-null  int64 
 10  budget         39170 non-null  int64 
dtypes: int64(6), object(5)
memory usage: 3.6+ MB


# District Summary
## Create a high-level snapshot, in a DataFrame, of the district's key metrics, including the following:

 - Total schools
 - Total students
 - Total budget
 - Average math score
 - Average reading score
 - % passing math (the percentage of students who passed math)
 - % passing reading (the percentage of students who passed reading)
 - % overall passing (the percentage of students who passed math AND reading)

In [8]:
# Assign all of the required metrics to variables
total_schools = len(school_data)
total_students = len(student_data)
total_budget = school_data.budget.sum()
average_reading = student_data.reading_score.mean()
average_math = student_data.math_score.mean()

print(total_schools)
print(total_students)
print(total_budget)
print(average_reading)
print(average_math)

15
39170
24649428
81.87784018381414
78.98537145774827


In [12]:
# Obtain the value for % of students passing math
mask = student_data.math_score >= 70
passing_math = len(student_data.loc[mask])
math_percent = 100 * (passing_math / total_students)
print(math_percent)

74.9808526933878


In [14]:
# Obtain the value for % of students passing reading
mask2 = student_data.reading_score >= 70
passing_reading = len(student_data.loc[mask2])
reading_percent = 100 * (passing_reading / total_students)
print(reading_percent)

85.80546336482001


In [15]:
# Obtain the value for % of students passing both matha and reading
mask3 = (student_data.reading_score >= 70) & (student_data.math_score >= 70)
passing_both = len(student_data.loc[mask3])
both_percent = 100 * (passing_both / total_students)
print(both_percent)

65.17232575950983


In [16]:
# High-level snapshot, in a DataFrame, of the district's key metics
metrics = {
    "Total Schools": total_schools,
    "Total Students": total_students,
    "Total Budget": total_budget,
    "Average Math Score": average_math,
    "Average Reading Score": average_reading,
    "% Passing Math": math_percent,
    "% Passing Reading": reading_percent,
    "% Overall Passing": both_percent
}

metricsdf = pd.DataFrame([metrics])
metricsdf

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,65.172326
