In [1]:
# importing necessary libaries
import pandas as pd
from pathlib import Path

# creating file paths
school_csv = Path("Resources/schools_complete.csv")
student_csv = Path("Resources/students_complete.csv")

# creating DataFrames from .csv files
school = pd.read_csv(school_csv)
student = pd.read_csv(student_csv)

# combining DataFrames to a single DataFrame
combined = pd.merge(school, student, how="left", on=["school_name", "school_name"]).reset_index(drop=True)

# printing combined DataFrame
combined

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,year,reading_score,maths_score
0,0,Huang High School,Government,2917,1910635,0,Paul Bradley,M,9,96,94
1,0,Huang High School,Government,2917,1910635,1,Victor Smith,M,12,90,43
2,0,Huang High School,Government,2917,1910635,2,Kevin Rodriguez,M,12,41,76
3,0,Huang High School,Government,2917,1910635,3,Richard Scott,M,12,89,86
4,0,Huang High School,Government,2917,1910635,4,Bonnie Ray,F,9,87,69
...,...,...,...,...,...,...,...,...,...,...,...
39165,14,Thomas High School,Independent,1635,1043130,39165,Donna Howard,F,12,51,48
39166,14,Thomas High School,Independent,1635,1043130,39166,Dawn Bell,F,10,81,89
39167,14,Thomas High School,Independent,1635,1043130,39167,Rebecca Tanner,F,9,99,99
39168,14,Thomas High School,Independent,1635,1043130,39168,Desiree Kidd,F,10,72,77


**LGA SUMMARY**

In [None]:
# total count of schools
t_schools = combined["school_name"].nunique()

# total count of students
t_students = len(combined["student_name"])

# total budget
t_budget = combined["budget"].unique().sum()

# avg maths score for entire DataFrame
avg_maths = combined.maths_score.mean()

# avg reading score for entire DataFrame
avg_reading = combined.reading_score.mean()

# percentage of students with passing maths score (50 or higher)
maths_passed = combined[combined["maths_score"] >= 50].reset_index(drop=True)
maths_passed_count = len(maths_passed)

maths_passed_percentage = maths_passed_count / t_students * 100

# percentage of students with passing reading score (50 or higher)
reading_passed = combined[combined["reading_score"] >= 50].reset_index(drop=True)
reading_passed_count = len(reading_passed)

reading_passed_percentage = reading_passed_count / t_students * 100

# percentage of students who passed maths and reading
overall_passed = combined[(combined["maths_score"] >= 50) & (combined["reading_score"] >= 50)].reset_index(drop=True)
overall_passed_count = len(overall_passed)

overall_passed_percentage = overall_passed_count / t_students * 100

# creating LGA summary DataFrame
lga = pd.DataFrame([{"Total schools": t_schools,
                   "Total Students": t_students,
                   "Total Budget": t_budget,
                   "Average Maths Score": avg_maths,
                   "Average Reading Score": avg_reading,
                   "% Passing Maths": maths_passed_percentage,
                   "% Passing Reading": reading_passed_percentage,
                   "% Overall Passing": overall_passed_percentage}])

lga["Total Budget"].map("${:,2f}".format)

lga