# PyCity Schools Analysis

- Your analysis here

## Original Data

In [18]:
# Import libraries needed
import pandas as pd
from pathlib import Path

# Define paths for csv files
students_csv = Path("Resources/students_complete.csv")
schools_csv = Path("Resources/schools_complete.csv")

# Read files and store into a Pandas DataFrame
students_df = pd.read_csv(students_csv)
schools_df = pd.read_csv(schools_csv)

# Sample of student data
students_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [19]:
# Sample of school data
schools_df.head()

Unnamed: 0,School ID,school_name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [20]:
# Combine data for all students adding the school information
complete_df = pd.merge(students_df,schools_df,on="school_name",how="left")
complete_df.head()

Unnamed: 0,Student ID,student_name,gender,grade,school_name,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


## District Summary

In [21]:
# District Total Schools
total_schools = complete_df["school_name"].nunique()
total_schools

15

In [22]:
# District Total Students
total_students = complete_df["student_name"].count()
total_students

39170

In [23]:
# District Total Budget
total_budget = schools_df["budget"].sum()
total_budget

24649428

In [24]:
# District Average Math Score
avg_math_score = complete_df["math_score"].mean()
avg_math_score

78.98537145774827

In [25]:
# District Average Reading Score
avg_read_score = complete_df["reading_score"].mean()
avg_read_score

81.87784018381414

In [26]:
# District % of students passing math (passing score is >70%)
pass_math_count = complete_df[(complete_df["math_score"] >= 70)].count()["student_name"]
pass_math_perc = pass_math_count / float(total_students) * 100
pass_math_perc

74.9808526933878

In [27]:
# District % of students passing reading (passing score is >70%)
pass_read_count = complete_df[(complete_df["reading_score"] >= 70)].count()["student_name"]
pass_read_perc = pass_read_count / float(total_students) * 100
pass_read_perc

85.80546336482001

In [28]:
# District % of students passing reading (passing score is >70%)
pass_mathread_count = complete_df[
    (complete_df["reading_score"] >= 70) & (complete_df["math_score"] >= 70)
    ].count()["student_name"]
pass_mathread_perc = pass_mathread_count / float(total_students) * 100
pass_mathread_perc

65.17232575950983

In [29]:
# Disctrict's Key Metrics Summary
district_summary = pd.DataFrame({"Total Schools": total_schools, "Total Students": total_students, "Total Budget": total_budget,
                                 "Average Math Score": avg_math_score, "Average Reading Score": avg_read_score,
                                 "% Passing Math": pass_math_perc, "% Passing Reading": pass_read_perc,
                                 "Overall Passing": pass_mathread_perc}, index= [0])

# Formatting
district_summary["Total Students"] = district_summary["Total Students"].map("{:,}".format)
district_summary["Total Budget"] = district_summary["Total Budget"].map("${:,.2f}".format)
district_summary

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853,85.805463,65.172326


## School Summary

In [47]:
# School Types
school_types = schools_df.set_index(["school_name"])["type"]
school_types

school_name
Huang High School        District
Figueroa High School     District
Shelton High School       Charter
Hernandez High School    District
Griffin High School       Charter
Wilson High School        Charter
Cabrera High School       Charter
Bailey High School       District
Holden High School        Charter
Pena High School          Charter
Wright High School        Charter
Rodriguez High School    District
Johnson High School      District
Ford High School         District
Thomas High School        Charter
Name: type, dtype: object

In [48]:
# Total Students
school_students = schools_df.set_index(["school_name"])["size"]
school_students

school_name
Huang High School        2917
Figueroa High School     2949
Shelton High School      1761
Hernandez High School    4635
Griffin High School      1468
Wilson High School       2283
Cabrera High School      1858
Bailey High School       4976
Holden High School        427
Pena High School          962
Wright High School       1800
Rodriguez High School    3999
Johnson High School      4761
Ford High School         2739
Thomas High School       1635
Name: size, dtype: int64

In [49]:
# Total School Budget and Budget per Student
school_budget = schools_df.set_index(["school_name"])["budget"]
schools_df["budget_per_student"] = schools_df["budget"]/schools_df["size"]
school_budget_per_student = schools_df.set_index(["school_name"])["budget_per_student"]
school_budget

school_name
Huang High School        1910635
Figueroa High School     1884411
Shelton High School      1056600
Hernandez High School    3022020
Griffin High School       917500
Wilson High School       1319574
Cabrera High School      1081356
Bailey High School       3124928
Holden High School        248087
Pena High School          585858
Wright High School       1049400
Rodriguez High School    2547363
Johnson High School      3094650
Ford High School         1763916
Thomas High School       1043130
Name: budget, dtype: int64

In [50]:
school_budget_per_student

school_name
Huang High School        655.0
Figueroa High School     639.0
Shelton High School      600.0
Hernandez High School    652.0
Griffin High School      625.0
Wilson High School       578.0
Cabrera High School      582.0
Bailey High School       628.0
Holden High School       581.0
Pena High School         609.0
Wright High School       583.0
Rodriguez High School    637.0
Johnson High School      650.0
Ford High School         644.0
Thomas High School       638.0
Name: budget_per_student, dtype: float64

In [None]:
# Calculate the average test scores per school from school_data_complete
per_school_math = 
per_school_reading =

In [None]:
# Calculate the number of students per school with math scores of 70 or higher from school_data_complete
students_passing_math =
school_students_passing_math =

In [None]:
# Calculate the number of students per school with reading scores of 70 or higher from school_data_complete
students_passing_reading =
school_students_passing_reading =

In [None]:
# Use the provided code to calculate the number of students per school that passed both math and reading with scores of 70 or higher
students_passing_math_and_reading = complete_df[
    (school_data_complete["reading_score"] >= 70) & (school_data_complete["math_score"] >= 70)
]
school_students_passing_math_and_reading = students_passing_math_and_reading.groupby(["school_name"]).size()

    Average math score

    Average reading score

    % passing math (the percentage of students who passed math)

    % passing reading (the percentage of students who passed reading)

    % overall passing (the percentage of students who passed math AND reading)