In [None]:
import pandas as pd

# Load files
school_data = "Resources/schools_complete.csv"
student_data = "Resources/students_complete.csv"

#Load data into a dataframe
school_df = pd.read_csv(school_data)
student_df = pd.read_csv(student_data)

school_df.head(2)

student_df.head(2)

# Combine school and data dataframes into a single dataframe
school_complete_df = pd.merge(student_df, school_df, how = "left", on = ["school_name", "school_name"])
school_complete_df.head(3)


def convert_boolean_to_numeric(bool_value):
    if bool_value == True:
        return 1
    else:
        return 0
    

pass_reading_flag = school_complete_df["reading_score"] >= 70
pass_reading_flag

# Create a reading flag data frame to append to the school_complete_df
reading_pass_flag_df = pd.DataFrame(pass_reading_flag)
reading_pass_flag_df





reading_flag_df.rename(mapper = {"reading_score" : "reading_pass_flag"}, axis = "columns", inplace = True)
reading_flag_df.head(3)

pass_math_flag = school_complete_df["math_score"] >= 70
pass_math_flag.head

# Create a reading flag data frame to append to the school_complete_df
math_flag_df = pd.DataFrame(pass_math_flag)
math_flag_df

math_flag_df.rename(mapper = {"math_score" : "math_pass_flag"}, axis = "columns", inplace = True)
math_flag_df.head(3)

pass_overall = (pass_reading_flag & pass_math_flag)
pass_overall

pass_overall_df = pd.DataFrame(pass_overall, columns=["pass_overall_flag"])
pass_overall_df.head(3)

new_complete_school_df = school_complete_df.join(reading_flag_df)


flagged_complete_school_df = new_complete_school_df.join(math_flag_df)

flagged_complete_school_df = flagged_complete_school_df.join(pass_overall_df)
flagged_complete_school_df

# # Create a grouping based on the school name
schools = flagged_complete_school_df.groupby("school_name")

flagged_complete_school_df["reading_pass_flag"] = flagged_complete_school_df["reading_pass_flag"].apply(convert_boolean_to_numeric)

flagged_complete_school_df["math_pass_flag"] = flagged_complete_school_df["math_pass_flag"].apply(convert_boolean_to_numeric)


flagged_complete_school_df["pass_overall_flag"] = flagged_complete_school_df["pass_overall_flag"].apply(convert_boolean_to_numeric)
flagged_complete_school_df.head(3)

# Calculate the total number of schools
total_schools = len(schools)

# Calculate the total budget
budget_df = schools.first()
total_budget = budget_df["budget"].sum()

# Calculat the total number of students
total_students = school_complete_df["Student ID"].count()

# Calculate the average reading score
avg_reading_score = school_complete_df["reading_score"].mean()

# Calculate the average math score
avg_math_score = school_complete_df["math_score"].mean()

# Calculate the % of students that have a passing reading score
read_filter = school_complete_df["reading_score"] >= 70
num_passed_reading = len(school_complete_df[read_filter])
percent_passing_reading = num_passed_reading / total_students

# Calculate the % of students that have a passing math score
math_filter = school_complete_df["math_score"] >= 70
num_passed_math = len(school_complete_df[math_filter])
percent_passing_math = num_passed_math / total_students

# Calculate the % of students that have a passing math and reading score
overall_filter = len(school_complete_df[read_filter & math_filter])
percent_passing_overall = overall_filter / total_students


## District Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average math score 

* Calculate the average reading score

* Calculate the percentage of students with a passing math score (70 or greater)

* Calculate the percentage of students with a passing reading score (70 or greater)

* Calculate the percentage of students who passed math **and** reading (% Overall Passing)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

district_summary_df = pd.DataFrame(
    {"Total Schools": [total_schools],
     "Total Students": [f'{total_students:,}'],
     "Total Budget" : [f'${total_budget:,}'],
     "Avg Math Score" : [f'{(avg_math_score / 100):.2%}'],
     "Avg Reading Score" : [f'{(avg_reading_score / 100):.2%}'],
     "% Passing Math" : [f'{(percent_passing_math):.2%}'],
     "% Passing Reading" : [f'{percent_passing_reading:.2%}'],
     "% Overall Passing" : [f'{percent_passing_overall:.2%}']
     }
)

district_summary_df

flagged_complete_school_df.head(3)

type_budget_df = schools[["type", "budget"]].first()
type_budget_df.head(3)

# Calculate the values for each column using the agg function
school_summary1_df = schools.agg({"type" : "first",
                                 "Student ID" : "count",
                                 "budget" : "first",
                                 "math_score" : "mean",
                                 "reading_score" : "mean",
                                 "reading_pass_flag" : "sum",
                                 "math_pass_flag" : "sum",
                                 "pass_overall_flag" : "sum"})

# Rename the column headers
school_summary1_df.rename(mapper = {"type" : "School Type",
                                    "Student ID" : "Total Students",
                                    "budget" : "Total School Budget",
                                    "math_score" : "Average Math Score",
                                    "reading_score" : "Average Reading Score",
                                    "reading_pass_flag" : "Total Students Passing Reading",
                                    "math_pass_flag" : "Total Students Passing Math",
                                    "pass_overall_flag" : "Total Students Passing Overall"}, axis = "columns", inplace = True)

school_summary1_df.head(3)

# Calculate the budget per student
budget_student = school_summary1_df["Total School Budget"] / school_summary1_df["Total Students"]

# Calculate the passing percentages for each school
percent_passing_math = school_summary1_df["Total Students Passing Math"] / school_summary1_df["Total Students"]
percent_passing_reading = school_summary1_df["Total Students Passing Reading"] / school_summary1_df["Total Students"]
percent_passing_overall = school_summary1_df["Total Students Passing Overall"] / school_summary1_df["Total Students"]

# Convert the budget per student series into a data frame
budget_student_df = pd.DataFrame({"Per Student Budget" :budget_student,
                                  "% Passing Math" : percent_passing_math,
                                  "% Passing Reading" : percent_passing_reading,
                                  "% Overall Passing" : percent_passing_overall})
budget_student_df.head(3)



# Join the Per Student Budget data column to the data frame using a .join method
new_school_summary_df = school_summary1_df.join(budget_student_df)
new_school_summary_df.head(3)


## School Summary

* Create an overview table that summarizes key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * % Overall Passing (The percentage of students that passed math **and** reading.)
  
* Create a dataframe to hold the above results

# Set the column order for the updated data frame
column_order = ["School Type", "Total Students", "Total School Budget", "Per Student Budget",
                "Average Math Score", "Average Reading Score", "% Passing Math", "% Passing Reading", "% Overall Passing"]

# Apply the column order to the new data frame and save with a new dataframe name
school_summary_with_budget_df = new_school_summary_df.reindex(columns = column_order)
school_summary_with_budget_df



## Top Performing Schools (By % Overall Passing)

school_summary_with_budget_df.sort_values("% Overall Passing", ascending = False).head(5)



## Bottom Performing Schools (By % Overall Passing)

school_summary_with_budget_df.sort_values("% Overall Passing", ascending = True).head(5)



## Math Scores by Grade

* Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting





flagged_school_df = flagged_complete_school_df.set_index("school_name")
flagged_school_df.head(3)

# Create a series for ninth grade math scores
nine = flagged_school_df['grade'] == '9th'
ninth_grade = flagged_school_df[nine]
ninth_grade = ninth_grade['math_score']

#Create a DataFrame for ninth grade math scores
ninth_grade_df = pd.DataFrame(ninth_grade)

# Rename the column name
ninth_grade_df.rename(mapper = {'math_score': '9th'}, axis = 'columns', inplace = True)

# Group by school
ninth_grade_df = ninth_grade_df.groupby("school_name")

# Get the average math score by school
ninth = ninth_grade_df["9th"].mean()

# Recreate the DataFrame summarized by school
ninth_grade_df = pd.DataFrame(ninth)

# Create a series for tenth grade math scores
ten = flagged_school_df['grade'] == '10th'
tenth_grade = flagged_school_df[ten]
tenth_grade = tenth_grade['math_score']

# Create a DataFrame for tenth grade math scores
tenth_grade_df = pd.DataFrame(tenth_grade)

# Rename the column
tenth_grade_df.rename(mapper = {"math_score" : "10th"}, axis = "columns", inplace = True)

# Group by school
tenth_grade_df = tenth_grade_df.groupby("school_name")

# Get the average math score by school
tenth = tenth_grade_df["10th"].mean()

# Recreate the DataFrame summarized by school
tenth_grade_df = pd.DataFrame(tenth)

# Create a series for eleventh grade math scores
eleven = flagged_school_df['grade'] == '11th'
eleventh_grade = flagged_school_df[eleven]
eleventh_grade = eleventh_grade['math_score']

# Create a DataFrame for eleventh  grade math scores
eleventh_grade_df = pd.DataFrame(eleventh_grade)

# Rename the column
eleventh_grade_df.rename(mapper = {"math_score" : "11th"}, axis = "columns", inplace = True)

# Group by school
eleventh_grade_df = eleventh_grade_df.groupby("school_name")

# Get the average math score by school
eleventh = eleventh_grade_df["11th"].mean()

# Recreate the DataFrame summarized by school
eleventh_grade_df = pd.DataFrame(eleventh)

# Create a series for twelfth grade math scores
twelve = flagged_school_df['grade'] == '12th'
twelfth_grade = flagged_school_df[twelve]
twelfth_grade = twelfth_grade['math_score']

# Create a DataFrame for twelfth grade math scores
twelfth_grade_df = pd.DataFrame(twelfth_grade)

# Rename the column
twelfth_grade_df.rename(mapper = {"math_score" : "12th"}, axis = "columns", inplace = True)

# Group by school
twelfth_grade_df = twelfth_grade_df.groupby("school_name")

# Get the average math score by school
twelfth = twelfth_grade_df["12th"].mean()

# Recreate the DataFrame summarized by school
twelfth_grade_df = pd.DataFrame(twelfth)

# Join the 9th and 10 columns together
math_scores_by_grade_df = ninth_grade_df.join(tenth_grade_df)

# Join the 11th grade column
math_scores_by_grade_df = math_scores_by_grade_df.join(eleventh_grade_df)

# Join the 12th grade column
math_scores_by_grade_df = math_scores_by_grade_df.join(twelfth_grade_df)

math_scores_by_grade_df