# PyCitySchools

In [1]:
 # Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
school_data_to_load = "Resources/schools_complete.csv"
student_data_to_load = "Resources/students_complete.csv"

# Read School and Student Data File
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

In [2]:
# Store into DataFrames
school_data_df = pd.DataFrame(school_data)

total_schools = school_data_df["School ID"].count()
total_students = school_data_df["size"].sum()
total_budget = school_data_df["budget"].sum()

school_data_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [3]:
# Print Quick sumary of total schools, total students and total budget
print(total_schools, total_students, total_budget)

15 39170 24649428


In [4]:
# Combine the data into a single dataset
school_data_complete = pd.merge(school_data, student_data, how="left", on=["school_name", "school_name"])
school_data_complete.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


## District Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average math score 

* Calculate the average reading score

* Calculate the percentage of students with a passing math score (70 or greater)

* Calculate the percentage of students with a passing reading score (70 or greater)

* Calculate the percentage of students who passed math **and** reading (% Overall Passing)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

In [5]:
# Total Schools
total_schools = school_data_complete ["school_name"].nunique()
total_schools

15

In [6]:
# Total Students
total_students = int(school_data_complete["Student ID"].nunique())
total_students

39170

In [7]:
# Total Budget
total_budget = school_data_df["budget"].sum()
total_budget

24649428

In [8]:
# Average Math Score
avg_math_score = round(school_data_complete["math_score"].mean(),1)
avg_math_score

79.0

In [9]:
# Average Reading Score
avg_reading_score = round(school_data_complete["reading_score"].mean(),0)
avg_reading_score

82.0

In [10]:
# Total of Students with a passing math score of 70 or greater
students_passing_math = school_data_complete[school_data_complete["math_score"]>=70].count()["student_name"]

# Percentage of Students Passing maths
percentage_passed_math = round((students_passing_math/total_students)*100,1)
percentage_passed_math

75.0

In [11]:
# Total Students with a passing reading score of 70 or greater
students_passing_reading = school_data_complete[school_data_complete["reading_score"]>=70].count()["student_name"]

# Percentage of students Passing Reading
percentage_passed_reading = round((students_passing_reading/total_students)*100,1)
percentage_passed_reading

85.8

In [12]:
# Percentage Overall Passing (The percentage of students that passed math and reading)
#overall_passing = (percentage_passed_math + percentage_passed_reading)/2
#overall_passing

overall_passing = (percentage_passed_math + percentage_passed_reading)/2
overall_passing

80.4

In [13]:
# Dataframe Summary of above results
district_Summary_df = pd.DataFrame({"Total Schools":[total_schools],
                      "Total Students":[total_students],
                      "Total Budget":[total_budget],
                      "Average Math Score":[avg_math_score],
                      'Average Reading Score':[avg_reading_score],
                      "% Passing Math":[percentage_passed_math],
                      "% Passing Reading":[percentage_passed_reading],
                      "% Overall Passing":[overall_passing]})

In [14]:
# Format
district_Summary_df['Total Budget'] = district_Summary_df['Total Budget'].map('${:,.2f}'.format)

district_Summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",79.0,82.0,75.0,85.8,80.4
