# In this notebook, we will read csv files containing data about various schools and students thereof, and create dataframes therefrom in order to help the school board and mayor make strategic decisions regarding future school budgets and priorities.

In [199]:
# We will import the pandas package, so that we can read csv files, create dataframes therefrom, and perform data analysis.
# We will also import numpy to assist in mathematical computations.
import pandas as pd
import numpy as np

pd.options.display.float_format = "{:,}".format

In [200]:
# This creates file paths to our school data and student data csv files, so that we can later create dataframes therefrom.
schools_complete_file_path = "Resources/schools_complete.csv"
students_complete_file_path = "Resources/students_complete.csv"

In [201]:
schools_dataframe = pd.read_csv(schools_complete_file_path)
students_dataframe = pd.read_csv(students_complete_file_path)

In [202]:
schools_dataframe.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [203]:
students_dataframe.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [204]:
students_schools_merged_dataframe = pd.merge(students_dataframe, schools_dataframe, how="left", on=["school_name", "school_name"])
students_schools_merged_dataframe.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [205]:
# By using the .nunique() method, we count the number of unique values in the school_name column; this is the total number of schools.
total_schools = students_schools_merged_dataframe["school_name"].nunique()

# By using the .count() method, we count the number of fields in the student_name column; this is the total number of students.
total_students = students_schools_merged_dataframe["student_name"].count()

# By using the .sum() method, we calculate the sum of each school budget.  We must first use the .unique() method, to ensure that we do not add the budget from the same school more than once.
total_budget = students_schools_merged_dataframe["budget"].unique().sum()

# By using the .mean method, we calculate the average math score of every student.
average_math = students_schools_merged_dataframe["math_score"].mean()

# By using the .mean method, we calculate the average math score of every student.
average_math = students_schools_merged_dataframe["math_score"].mean()

# By using the .mean method, we calculate the average reading score of every student.
average_reading = students_schools_merged_dataframe["reading_score"].mean()

# By using the function average from the numpy package, we can calculate the average of the values written therein.
overall_passing_rate = np.mean([average_math, average_reading])

# We ust a pandas feature that allows us to count the number of values in a column based on a condition- in our case, grades greater than or equal to 70- then we use the len() function to get the length of this list.
# We then divide this number by the length of the entire list, and multiply of 100, giving us the percent of passing grades.
percent_passing_math = len(students_schools_merged_dataframe[students_schools_merged_dataframe["math_score"] >= 70]) / students_schools_merged_dataframe["math_score"].count() * 100

# We ust a pandas feature that allows us to count the number of values in a column based on a condition- in our case, grades greater than or equal to 70- then we use the len() function to get the length of this list.
# We then divide this number by the length of the entire list, and multiply of 100, giving us the percent of passing grades.
percent_passing_reading = len(students_schools_merged_dataframe[students_schools_merged_dataframe["reading_score"] >= 70]) / students_schools_merged_dataframe["reading_score"].count() * 100

In [206]:
print(total_schools)
print(total_students)
print(total_budget)
print(average_math)
print(average_reading)
print(overall_passing_rate)
print(percent_passing_math)
print(percent_passing_reading)

15
39170
24649428
78.98537145774827
81.87784018381414
80.43160582078121
74.9808526933878
85.80546336482001


In [212]:
district_summary_data = [{"Total Schools":total_schools,
                          "Total Students":total_students,
                          "Total Budget":total_budget,
                         "Average Math Score":average_math,
                         "Average Reading Score":average_reading,
                         "Percent Overall Passing Rate":overall_passing_rate,
                         "Percent Passing Math":percent_passing_math,

In [215]:
district_summary = pd.DataFrame(data=district_summary_data, columns=["Total Schools",
                                                                     "Total Students",
                                                                    "Total Budget",
                                                                    "Average Math Score",
                                                                    "Average Reading Score",
                                                                    "Percent Overall Passing Rate",
                                                                    "Percent Passing Math",
                                                                    "Percent Passing Reading"])
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,Percent Overall Passing Rate,Percent Passing Math,Percent Passing Reading
0,15,39170,24649428,78.98537145774827,81.87784018381414,80.43160582078121,74.9808526933878,85.80546336482001
