In [2]:
# Import dependencies
import pandas as pd
import numpy as np

In [3]:
# Import csv files
schools_data = "Resources/schools_complete.csv"
students_data = "Resources/students_complete.csv"

In [4]:
# Read School and Student Data File and store into Pandas DataFrames
schools_data = pd.read_csv(schools_data)
students_data = pd.read_csv(students_data)

In [6]:
# View the school data
schools_data.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [7]:
# View the student data
students_data.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [8]:
# Combine the data into a single dataset.  
schools_data_complete = pd.merge(students_data, schools_data, how="left", on=["school_name", "school_name"])

In [9]:
# View the data
schools_data_complete.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [10]:
# Check for empty cells
schools_data_complete.count()

Student ID       39170
student_name     39170
gender           39170
grade            39170
school_name      39170
reading_score    39170
math_score       39170
School ID        39170
type             39170
size             39170
budget           39170
dtype: int64

## District Summary

In [17]:
# Create data frame for total number of schools
total_schools = schools_data['school_name'].count()
total_schools

15

In [18]:
# Create data frame for total number of students
total_students = students_data['student_name'].count()
total_students

39170

In [19]:
# Create data frame for total school budget
total_budget = schools_data['budget'].sum()
total_budget

24649428

In [21]:
# Create data frame for the average math score
avg_math_score = students_data['math_score'].mean()
avg_math_score

78.98537145774827

In [22]:
# Calculate the average reading score
avg_reading_score = students_data['reading_score'].mean()
avg_reading_score

81.87784018381414

In [25]:
# Calculate the percentage of students passing math and save to new data frame
num_passing_math = students_data.loc[students_data['math_score'] >= 70]['math_score'].count()
percent_passing_math = num_passing_math/total_students * 100
percent_passing_math

74.9808526933878

In [28]:
# Calculate the percentage of students passing reading and save to a new data frame
num_passing_reading = students_data.loc[students_data['reading_score'] >= 70]['reading_score'].count()
percent_passing_reading = num_passing_reading/total_students * 100
percent_passing_reading

85.80546336482001

In [29]:
# Calculate percentage of students passing math and reading and save to new data frame
overall_passing_rate = (avg_math_score + avg_reading_score)/2
overall_passing_rate

80.43160582078121

In [34]:
# Create data frame to hold the results
district_sum = pd.DataFrame({'Total Schools': [total_schools],
                         'Total Students': [total_students],
                         'Total Budget': [total_budget],
                         'Average Math Score': [avg_math_score],
                         'Average Reading Score': [avg_reading_score],
                         '% Passing Math': [percent_passing_math],
                         '% Passing Reading':[percent_passing_reading],
                         'Overall Passing Rate': [overall_passing_rate]
                        })
district_sum

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
0,15,39170,24649428,78.985371,81.87784,74.980853,85.805463,80.431606


## School Summary

In [37]:
# Count schools and save to data frame
total_number_schools = schools_data_complete.count()
total_number_schools

Student ID       39170
student_name     39170
gender           39170
grade            39170
school_name      39170
reading_score    39170
math_score       39170
School ID        39170
type             39170
size             39170
budget           39170
dtype: int64

In [38]:
# index by school and group by school name
by_school = schools_data_complete.set_index('school_name').groupby(['school_name'])
by_school.head()

Unnamed: 0_level_0,Student ID,student_name,gender,grade,reading_score,math_score,School ID,type,size,budget
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Huang High School,0,Paul Bradley,M,9th,66,79,0,District,2917,1910635
Huang High School,1,Victor Smith,M,12th,94,61,0,District,2917,1910635
Huang High School,2,Kevin Rodriguez,M,12th,90,60,0,District,2917,1910635
Huang High School,3,Dr. Richard Scott,M,12th,67,58,0,District,2917,1910635
Huang High School,4,Bonnie Ray,F,9th,97,84,0,District,2917,1910635
...,...,...,...,...,...,...,...,...,...,...
Thomas High School,37535,Norma Mata,F,10th,76,76,14,Charter,1635,1043130
Thomas High School,37536,Cody Miller,M,11th,84,82,14,Charter,1635,1043130
Thomas High School,37537,Erik Snyder,M,9th,80,90,14,Charter,1635,1043130
Thomas High School,37538,Tanya Martinez,F,9th,71,69,14,Charter,1635,1043130


In [39]:
# set index to school name include type column
school_types = schools_data.set_index('school_name')['type']
school_types.head()

school_name
Huang High School        District
Figueroa High School     District
Shelton High School       Charter
Hernandez High School    District
Griffin High School       Charter
Name: type, dtype: object

In [40]:
# Create new data frame for student count from by_school data frame
student_by_school = by_school['Student ID'].count()
student_by_school

school_name
Bailey High School       4976
Cabrera High School      1858
Figueroa High School     2949
Ford High School         2739
Griffin High School      1468
Hernandez High School    4635
Holden High School        427
Huang High School        2917
Johnson High School      4761
Pena High School          962
Rodriguez High School    3999
Shelton High School      1761
Thomas High School       1635
Wilson High School       2283
Wright High School       1800
Name: Student ID, dtype: int64

In [42]:
# Create data frame for budget indexed on school_name
budget_by_school = schools_data.set_index('school_name')['budget']
budget_by_school

school_name
Huang High School        1910635
Figueroa High School     1884411
Shelton High School      1056600
Hernandez High School    3022020
Griffin High School       917500
Wilson High School       1319574
Cabrera High School      1081356
Bailey High School       3124928
Holden High School        248087
Pena High School          585858
Wright High School       1049400
Rodriguez High School    2547363
Johnson High School      3094650
Ford High School         1763916
Thomas High School       1043130
Name: budget, dtype: int64

In [44]:
# Calculate budget by student in new data frame
budget_by_student = schools_data.set_index('school_name')['budget']/schools_data.set_index('school_name')['size']
budget_by_student

school_name
Huang High School        655.0
Figueroa High School     639.0
Shelton High School      600.0
Hernandez High School    652.0
Griffin High School      625.0
Wilson High School       578.0
Cabrera High School      582.0
Bailey High School       628.0
Holden High School       581.0
Pena High School         609.0
Wright High School       583.0
Rodriguez High School    637.0
Johnson High School      650.0
Ford High School         644.0
Thomas High School       638.0
dtype: float64