# PyCity School Analysis Report

<h2>Analysis Summary:</h2>

* content

<h2>2 Conclusions/Comparisons:</h2>

* content

In [98]:
# Setup/Importing files
import pandas as pd
from pathlib import Path

school_data_import = Path("../Resources/schools_complete.csv")
student_data_import = Path("../Resources/students_complete.csv")

#Reading files/Storing into Pandas DF's
school_data = pd.read_csv(school_data_import)
student_data = pd.read_csv(student_data_import)

#Combine data into one dataset
school_data_combo = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])
school_data_combo.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

In [99]:
#Find total number of schools in district
school_names_col = school_data_combo['school_name']
total_schools_count = school_names_col.nunique()

total_schools_count

15

In [100]:
#Find total number of students in district
student_names_col = school_data_combo['Student ID']
total_students_count = student_names_col.nunique()

total_students_count

39170

In [101]:
#Find total budget of district
school_budget_col = school_data_combo['budget']
school_budgets = school_budget_col.unique()
total_budget = sum(school_budgets)

total_budget

24649428

In [102]:
#Find average math score of district
math_score_col = school_data_combo['math_score']
average_math_score = sum(math_score_col)/total_students_count

average_math_score

78.98537145774827

In [103]:
#Find average reading score of district
reading_score_col = school_data_combo['reading_score']
average_reading_score = sum(reading_score_col)/total_students_count

average_reading_score

81.87784018381414

In [104]:
#Find percent of students in district who passed math (score greater than or equal to 70)
math_pass_count = school_data_combo[(school_data_combo['math_score'] >= 70)].count()['Student ID']
math_pass_percent = math_pass_count / float(total_students_count) * 100

math_pass_percent

74.9808526933878

In [105]:
#Find percent of students in district who passed reading (score greater than or equal to 70)
reading_pass_count = school_data_combo[(school_data_combo['reading_score'] >= 70)].count()['Student ID']
reading_pass_percent = reading_pass_count / float(total_students_count) * 100

reading_pass_percent

85.80546336482001

In [106]:
#Find percent of students in district who passed both math and reading
combo_pass_count = school_data_combo[(school_data_combo['math_score'] >= 70) & (school_data_combo['reading_score'] >= 70)].count()['Student ID']
pass_rate = combo_pass_count / float(total_students_count) * 100

pass_rate

65.17232575950983

In [107]:
#Create snapshot of districts key metrics in a DF

#Place all data into a dictionary
district_data = {
                'Total Schools': [total_schools_count],
                'Total Students': [total_students_count],
                'Total Budget': [total_budget],
                'Average Math Score': [average_math_score],
                'Average Reading Score': [average_reading_score],
                '% Passing Math': [math_pass_percent],
                '% Passing Reading': [reading_pass_percent],
                '% Overall Passing': [pass_rate]
                }
#Format dictionary as a Data Frame
district_summary = pd.DataFrame(district_data)

#Format specific data types
district_summary['Total Students'] = district_summary['Total Students'].map("{:,}".format)
district_summary['Total Budget'] = district_summary['Total Budget'].map("${:,.2f}".format)

district_summary
    

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


## School Summary

In [108]:
#Find school type per school
school_types = school_data.set_index(['school_name'])['type']


In [109]:
#Find total students per school
student_per_school_count = school_data.set_index(['school_name'])['size']


In [110]:
#Find total school budget and budget per student (per capita) per school

#Budget per school
budget_per_school = school_data.set_index(['school_name'])['budget']

#Budget per student
budget_per_student = budget_per_school / student_per_school_count

In [111]:
#Find number of students per school with passing math score
students_passing_math_perschool = school_data_combo[school_data_combo['math_score'] >= 70].groupby('school_name').count()['Student ID']


In [112]:
#Find number of students per school with passing reading score
students_passing_reading_perschool = school_data_combo[school_data_combo['reading_score'] >= 70].groupby('school_name').count()['Student ID']


In [113]:
#Find average test scores per school (math and reading separately)
avg_mathscore_perschool = school_data_combo.groupby('school_name')['math_score'].mean()

avg_readingscore_perschool = school_data_combo.groupby('school_name')['reading_score'].mean()


In [114]:
#Find number of students per school who passed both math and reading
students_passing_mathandreading = school_data_combo[(school_data_combo['reading_score'] >= 70) & (school_data_combo['math_score'] >= 70)]
spmar_byschool = students_passing_mathandreading.groupby(['school_name']).size()


In [120]:
#Find passing rates
percent_perschool_passing_math = students_passing_math_perschool / student_per_school_count * 100
percent_perschool_passing_reading = students_passing_reading_perschool / student_per_school_count * 100
overall_percent_passing = spmar_byschool / student_per_school_count * 100

school_name
Bailey High School       54.642283
Cabrera High School      91.334769
Figueroa High School     53.204476
Ford High School         54.289887
Griffin High School      90.599455
Hernandez High School    53.527508
Holden High School       89.227166
Huang High School        53.513884
Johnson High School      53.539172
Pena High School         90.540541
Rodriguez High School    52.988247
Shelton High School      89.892107
Thomas High School       90.948012
Wilson High School       90.582567
Wright High School       90.333333
dtype: float64

In [116]:
#Create DataFrame for school data summary