In [1]:
# importing necessary libaries
import pandas as pd
from pathlib import Path

# creating file paths
school_csv = Path("Resources/schools_complete.csv")
student_csv = Path("Resources/students_complete.csv")

# creating DataFrames from .csv files
school = pd.read_csv(school_csv)
student = pd.read_csv(student_csv)

# combining DataFrames to a single DataFrame
combined = pd.merge(school, student, how="left", on=["school_name", "school_name"]).reset_index(drop=True)

# printing combined DataFrame
combined

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,year,reading_score,maths_score
0,0,Huang High School,Government,2917,1910635,0,Paul Bradley,M,9,96,94
1,0,Huang High School,Government,2917,1910635,1,Victor Smith,M,12,90,43
2,0,Huang High School,Government,2917,1910635,2,Kevin Rodriguez,M,12,41,76
3,0,Huang High School,Government,2917,1910635,3,Richard Scott,M,12,89,86
4,0,Huang High School,Government,2917,1910635,4,Bonnie Ray,F,9,87,69
...,...,...,...,...,...,...,...,...,...,...,...
39165,14,Thomas High School,Independent,1635,1043130,39165,Donna Howard,F,12,51,48
39166,14,Thomas High School,Independent,1635,1043130,39166,Dawn Bell,F,10,81,89
39167,14,Thomas High School,Independent,1635,1043130,39167,Rebecca Tanner,F,9,99,99
39168,14,Thomas High School,Independent,1635,1043130,39168,Desiree Kidd,F,10,72,77


## **LGA SUMMARY**

In [2]:
# total count of schools
t_schools = combined["school_name"].nunique()

# total count of students
t_students = len(combined["student_name"])

# total budget
t_budget = combined["budget"].unique().sum()

# avg maths score for entire DataFrame
avg_maths = combined.maths_score.mean()

# avg reading score for entire DataFrame
avg_reading = combined.reading_score.mean()

# percentage of students with passing maths score (50 or higher)
maths_passed = combined[combined["maths_score"] >= 50].reset_index(drop=True)
maths_passed_count = len(maths_passed)
maths_passed_percentage = maths_passed_count / t_students * 100

# percentage of students with passing reading score (50 or higher)
reading_passed = combined[combined["reading_score"] >= 50].reset_index(drop=True)
reading_passed_count = len(reading_passed)
reading_passed_percentage = reading_passed_count / t_students * 100

# percentage of students who passed maths and reading
overall_passed = combined[(combined["maths_score"] >= 50) & (combined["reading_score"] >= 50)].reset_index(drop=True)
overall_passed_count = len(overall_passed)
overall_passed_percentage = overall_passed_count / t_students * 100

# creating LGA summary DataFrame
lga = pd.DataFrame([{"Total schools": t_schools,
                   "Total Students": t_students,
                   "Total Budget": t_budget,
                   "Average Maths Score": avg_maths,
                   "Average Reading Score": avg_reading,
                   "% Passing Maths": maths_passed_percentage,
                   "% Passing Reading": reading_passed_percentage,
                   "% Overall Passing": overall_passed_percentage}])

# formatting total budget to currency standard
lga["Total Budget"] = lga["Total Budget"].map("${:,.2f}".format)

lga

Unnamed: 0,Total schools,Total Students,Total Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",70.338192,69.980138,86.078632,84.426857,72.808272


## **SCHOOL SUMMARY**

In [3]:
# creating school summary DataFrame using existing school .csv file, and then renaming columns and setting index to school name
school_updated = pd.read_csv(school_csv).sort_values("school_name").drop("School ID", axis=1)

school_updated = school_updated.rename(columns={"school_name": "School Name", "type": "School Type", "size": "Total Students", "budget": "Total School Budget"})

school_updated = school_updated.set_index("School Name")

# adding new datapoints
# per student budget
school_updated["Per Student Budget"] = school_updated["Total School Budget"] / school_updated["Total Students"]

# average maths score
school_updated["Average Maths Score"] = combined.groupby("school_name")["maths_score"].mean()

# average reading score
school_updated["Average Reading Score"] = combined.groupby("school_name")["reading_score"].mean()

# % passing maths
school_maths_passed = maths_passed.groupby("school_name")["maths_score"].count()
school_updated["% Passing Maths"] = school_maths_passed / school_updated["Total Students"] * 100

# % passing reading
school_reading_passed = reading_passed.groupby("school_name")["reading_score"].count()
school_updated["% Passing Reading"] = school_reading_passed / school_updated["Total Students"] * 100

# % overall passing
school_overall_passed = overall_passed.groupby("school_name")["student_name"].count()
school_updated["% Overall Passing"] = school_overall_passed / school_updated["Total Students"] * 100

# formatting school budget and per student budget to currency standard
school_updated["Total School Budget"] = school_updated["Total School Budget"].map("${:,.2f}".format)
school_updated["Per Student Budget"] = school_updated["Per Student Budget"].map("${:,.2f}".format)

school_updated

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,Government,4976,"$3,124,928.00",$628.00,72.352894,71.008842,91.639871,87.379421,80.084405
Cabrera High School,Independent,1858,"$1,081,356.00",$582.00,71.657158,71.359526,90.850377,89.074273,80.785791
Figueroa High School,Government,2949,"$1,884,411.00",$639.00,68.698542,69.077993,81.654798,82.807731,67.650051
Ford High School,Government,2739,"$1,763,916.00",$644.00,69.091274,69.572472,82.438846,82.219788,67.46988
Griffin High School,Independent,1468,"$917,500.00",$625.00,71.788147,71.245232,91.212534,88.487738,81.33515
Hernandez High School,Government,4635,"$3,022,020.00",$652.00,68.874865,69.186408,80.949299,81.877023,66.364617
Holden High School,Independent,427,"$248,087.00",$581.00,72.583138,71.660422,89.929742,88.52459,78.922717
Huang High School,Government,2917,"$1,910,635.00",$655.00,68.935207,68.910525,81.693521,81.453548,66.712376
Johnson High School,Government,4761,"$3,094,650.00",$650.00,68.8431,69.039277,82.062592,81.978576,67.191766
Pena High School,Independent,962,"$585,858.00",$609.00,72.088358,71.613306,91.683992,86.590437,79.209979


## TOP PERFORMING SCHOOLS (BY % OVERALL PASSING)

In [5]:
# sorting schools_updated in descending order based on '% Overall Passing'
top_performing = school_updated.sort_values("% Overall Passing", ascending=False)

top_performing.head(5)

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Griffin High School,Independent,1468,"$917,500.00",$625.00,71.788147,71.245232,91.212534,88.487738,81.33515
Cabrera High School,Independent,1858,"$1,081,356.00",$582.00,71.657158,71.359526,90.850377,89.074273,80.785791
Bailey High School,Government,4976,"$3,124,928.00",$628.00,72.352894,71.008842,91.639871,87.379421,80.084405
Wright High School,Independent,1800,"$1,049,400.00",$583.00,72.047222,70.969444,91.777778,86.666667,79.722222
Rodriguez High School,Government,3999,"$2,547,363.00",$637.00,72.047762,70.935984,90.797699,87.396849,79.419855


## BOTTOM PERFORMING SCHOOLS (BY % OVERALL PASSING)

In [6]:
# sorting schools_updated in ascending order based on '% Overall Passing'
bottom_performing = school_updated.sort_values("% Overall Passing")

bottom_performing.head(5)

Unnamed: 0_level_0,School Type,Total Students,Total School Budget,Per Student Budget,Average Maths Score,Average Reading Score,% Passing Maths,% Passing Reading,% Overall Passing
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Hernandez High School,Government,4635,"$3,022,020.00",$652.00,68.874865,69.186408,80.949299,81.877023,66.364617
Huang High School,Government,2917,"$1,910,635.00",$655.00,68.935207,68.910525,81.693521,81.453548,66.712376
Johnson High School,Government,4761,"$3,094,650.00",$650.00,68.8431,69.039277,82.062592,81.978576,67.191766
Wilson High School,Independent,2283,"$1,319,574.00",$578.00,69.170828,68.876916,82.785808,81.29654,67.455103
Ford High School,Government,2739,"$1,763,916.00",$644.00,69.091274,69.572472,82.438846,82.219788,67.46988


## MATHS SCORES BY YEAR