In [211]:
# Set up to be able to analyze the data from the csv files
# Import Dependencies
import pandas as pd

# File path to load
school_file = 'Resources/schools_complete.csv'
students_file = 'Resources/students_complete.csv'

# Read in the files and store them in a DataFrame
school_df = pd.read_csv(school_file)
students_df = pd.read_csv(students_file)

# Combine the data into a single dataset
complete_school_data = pd.merge(students_df, school_df, how='left', on=['school_name', 'school_name'])
complete_school_data.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [212]:
# Calculate the total number of unique schools
school_count = len(complete_school_data['school_name'].unique())
school_count

15

In [213]:
# Calculate the total number of students
total_students = complete_school_data['Student ID'].count()
total_students

39170

In [214]:
# Calculate the total budget
# Create new df using .drop_duplicates
    # This way we create a column with each school's budget listed only once
total_budget_df = complete_school_data.drop_duplicates(['budget'])
total_budget = total_budget_df['budget'].sum()
total_budget


24649428

In [215]:
# Calculate the average math score
avg_math_score = complete_school_data['math_score'].mean()
avg_math_score

78.98537145774827

In [216]:
# Calculate the average reading score
avg_reading_score = complete_school_data['reading_score'].mean()
avg_reading_score

81.87784018381414

In [217]:
# Calculate the percentage of students who passed math
passed_math_count = complete_school_data[(complete_school_data['math_score'] >= 70)].count()['student_name']
passed_math_percentage = passed_math_count / float(total_students) * 100
passed_math_percentage

74.9808526933878

In [218]:
# Calculate the percentage of students who passed reading
passed_reading_count = complete_school_data[(complete_school_data['reading_score'] >= 70)].count()['student_name']
passed_reading_percentage = passed_reading_count / float(total_students) * 100
passed_reading_percentage

85.80546336482001

In [219]:
# Calculate the percentage of students that passed both math and reading
passed_both_count = complete_school_data[(complete_school_data['math_score'] >= 70) & (complete_school_data['reading_score'] >= 70)].count()['student_name']
passed_both_percentage = passed_both_count / float(total_students) * 100
passed_both_percentage

65.17232575950983

In [220]:
# Create a snapshot of the district summary
# Create a dictionary from our calculations
district_dict = [{'Total Schools': school_count, 
                  'Total Students': total_students, 
                  'Total Budget': total_budget,
                  'Average Math Score': avg_math_score,
                  'Average Reading Score': avg_reading_score,
                  '% Passing Math': passed_math_percentage, 
                  '% Passing Reading': passed_reading_percentage, 
                  '% Overall Passing': passed_both_percentage}]

# Create the DataFrame
district_summary = pd.DataFrame(district_dict)

# Format the total students, total budget, and percentages correctly
district_summary['Total Students'] = district_summary["Total Students"].map("{:,}".format)
district_summary['Total Budget'] = district_summary["Total Budget"].map("${:,.2f}".format)
district_summary['% Passing Math'] = district_summary['% Passing Math'].map("{:,.2f}%".format)
district_summary['% Passing Reading'] = district_summary['% Passing Reading'].map("{:,.2f}%".format)
district_summary['% Overall Passing'] = district_summary['% Overall Passing'].map("{:,.2f}%".format)

district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.98%,85.81%,65.17%
