### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import os

In [2]:
# File to Load (Remember to Change These)
school_data_to_load = os.path.join("Resources", "schools_complete.csv")
student_data_to_load = os.path.join("Resources", "students_complete.csv")

In [3]:
# Read School and Student Data File and store into Pandas DataFrames
school_data = pd.read_csv(school_data_to_load)
student_data = pd.read_csv(student_data_to_load)

In [4]:
# Combine the two data sets into a single dataset.  
school_data_complete = pd.merge(student_data, school_data, how="left", on=["school_name", "school_name"])

## District Summary

* Calculate the total number of schools

* Calculate the total number of students

* Calculate the total budget

* Calculate the average math score 

* Calculate the average reading score

* Calculate the percentage of students with a passing math score (70 or greater)

* Calculate the percentage of students with a passing reading score (70 or greater)

* Calculate the percentage of students who passed math **and** reading (% Overall Passing)

* Create a dataframe to hold the above results

* Optional: give the displayed data cleaner formatting

In [5]:
# Verify the data merged is clean.  
school_data_complete.count()

Student ID       39170
student_name     39170
gender           39170
grade            39170
school_name      39170
reading_score    39170
math_score       39170
School ID        39170
type             39170
size             39170
budget           39170
dtype: int64

In [6]:
# Perform calculations to determine district metrics
# Calculate the total number of schools
school_count_a = float(len(school_data))

# Calculate the number of students
student_count_a = float(school_data["size"].sum())

# Calculate the total budget
total_budget = float(school_data["budget"].sum())

# Calculate the avergae math score
avg_math = float(student_data["math_score"].mean())

# Calculate the avergae reading score
avg_reading = float(student_data["reading_score"].mean())

In [7]:
# Calculate the percentage with a passing math score (70 or higher)
# Calculate the percentage with a passing reading score (70 or higher)
# Create the bins in which Data will be held
bins = [0, 69.9, 100]

# Create the names for the five bins
group_names = ["Fail", "Pass"]

# Cut the DataFrame into the groups identified and place in a new column
student_data["math_score_summary"] = pd.cut(student_data["math_score"], bins, labels=group_names, include_lowest=True)
student_data["reading_score_summary"] = pd.cut(student_data["reading_score"], bins, labels=group_names, include_lowest=True)

# Create a grouping based off the summary of the math score
student_data_math = student_data.groupby("math_score_summary")
student_data_reading = student_data.groupby("reading_score_summary")

# Calculate the % of pass and fail
student_math_summary = student_data_math["math_score_summary"].count()
student_reading_summary = student_data_reading["reading_score_summary"].count()
student_math_summary_df = pd.DataFrame((student_math_summary/student_count_a)*100)
student_reading_summary_df = pd.DataFrame((student_reading_summary/student_count_a)*100)

# Store the passing percentages
student_math_summary_pass = float(student_math_summary_df.loc["Pass", "math_score_summary"])
student_reading_summary_pass = float(student_reading_summary_df.loc["Pass", "reading_score_summary"])

In [8]:
# Calculate the percentage of students who passed both the math and reading

# Define a procedure to compare the math and reading scores and output a Pass or Fail for the total score
def compare(row):
    if row["math_score_summary"] == "Pass" and row["reading_score_summary"] == "Pass":
        total_score = "Pass"
    else:
        total_score = "Fail"
    return total_score

# Apply the procedure to the student_data DataFrame
student_data["total_score_summary"] = student_data.apply(compare, axis=1)

# Create a grouping bassed off the summary of the total score
student_data_total = student_data.groupby("total_score_summary")

#Calculate the % of pass and fail
student_total_summary = student_data_total["total_score_summary"].count()
student_total_summary_df = pd.DataFrame((student_total_summary/student_count_a)*100)

# Store the passing percentage
student_total_summary_pass = float(student_total_summary_df.loc["Pass", "total_score_summary"])

In [9]:
# Create a district summary DataFrame with the results
district_summary_df = pd.DataFrame({
    "Total # of Schools": [school_count_a],
    "Total # of Students":[student_count_a],
    "Total Budget": [total_budget],
    "Average Math Score": [avg_math], 
    "Average Reading Score": [avg_reading],
    "% of Students Passing Math": [student_math_summary_pass], 
    "% of Students Passing Reading": [student_reading_summary_pass],
    "% of Students Passing Both": [student_total_summary_pass]
})

In [10]:
# Format the data in the summary DataFrame
district_summary_df["Total # of Schools"] = district_summary_df["Total # of Schools"].astype(float).map("{:,.0f}".format)
district_summary_df["Total # of Students"] = district_summary_df["Total # of Students"].astype(float).map("{:,.0f}".format)
district_summary_df["Total Budget"] = district_summary_df["Total Budget"].astype(float).map("${:,.2f}".format)
district_summary_df["Average Math Score"] = district_summary_df["Average Math Score"].astype(float).map("{:.2f}".format)
district_summary_df["Average Reading Score"] = district_summary_df["Average Reading Score"].astype(float).map("{:.2f}".format)
district_summary_df["% of Students Passing Math"] = district_summary_df["% of Students Passing Math"].astype(float).map("{:.2f}%".format)
district_summary_df["% of Students Passing Reading"] = district_summary_df["% of Students Passing Reading"].astype(float).map("{:.2f}%".format)
district_summary_df["% of Students Passing Both"] = district_summary_df["% of Students Passing Both"].astype(float).map("{:.2f}%".format)
district_summary_df

Unnamed: 0,Total # of Schools,Total # of Students,Total Budget,Average Math Score,Average Reading Score,% of Students Passing Math,% of Students Passing Reading,% of Students Passing Both
0,15,39170,"$24,649,428.00",78.99,81.88,74.98%,85.81%,65.17%


## School Summary

* Create an overview table that summarizes key metrics about each school, including:
  * School Name
  * School Type
  * Total Students
  * Total School Budget
  * Per Student Budget
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * % Overall Passing (The percentage of students that passed math **and** reading.)
  
* Create a dataframe to hold the above results

In [11]:
# Generate a count of the students per school based on the value counts per school, create a DataFrame, and rename the columns
school_list = school_data_complete["school_name"].value_counts()
school_list_df = pd.DataFrame(school_list)
school_list_df = school_list_df.rename(columns={"school_name": "Total Students"})

# Reset the index of the DataFrame to do the merge based on school name to pull in the students in the school and the budget of the school
school_list_df = school_list_df.reset_index()
school_list_df = school_list_df.rename(columns={"index": "School Name"})
school_list_summary_df = school_list_df.merge(school_data, left_on="School Name", right_on="school_name")

# Delete the old School ID and the duplicate school name
school_list_summary_df = school_list_summary_df.drop(columns=["School ID", "school_name", "size"])

# Calculate the Per Student Budget
school_list_summary_df["Per Student Budget"] = school_list_summary_df["budget"]/school_list_summary_df["Total Students"]

In [12]:
# Replace the Pass/Fail with a 1 for Pass and 0 for Fail in order to sum the students in each school who passed each subject.
student_data_adj = student_data
student_data_adj = student_data_adj.replace("Pass", 1)
student_data_adj = student_data_adj.replace("Fail", 0)

# Change the data type of the score summaries into integers so that statistical functions can be run.
student_data_adj = student_data_adj.astype({"math_score_summary": int, "reading_score_summary": int, "total_score_summary": int})

In [13]:
# Establish a grouping of the student data by school name
school_grade_summary = student_data_adj.groupby(["school_name"])

# Determine the average math score by school.  Rename the column.
school_math_avg = pd.DataFrame(school_grade_summary["math_score"].mean())
school_math_avg = school_math_avg.rename(columns={"math_score": "Avg Math Score"})

# Determine how many students passed math
school_math_summary = pd.DataFrame(school_grade_summary["math_score_summary"].sum())

# Determine the average reading score by school.  Rename the column.
school_reading_avg = pd.DataFrame(school_grade_summary["reading_score"].mean())
school_reading_avg = school_reading_avg.rename(columns={"reading_score": "Avg Reading Score"})

# Determine how many students passed reading
school_reading_summary = pd.DataFrame(school_grade_summary["reading_score_summary"].sum())

# Determine how many students passed both math and reading
school_total_summary = pd.DataFrame(school_grade_summary["total_score_summary"].sum())

# Merge the above analysis (average math, average reading, total passing math, total passing reading, and total passing both into a single DataFrame
school_grade_sumamry_df = school_math_avg.merge(school_reading_avg, on="school_name")
school_grade_sumamry_df = school_grade_sumamry_df.merge(school_math_summary, on="school_name")
school_grade_sumamry_df = school_grade_sumamry_df.merge(school_reading_summary, on="school_name")
school_grade_sumamry_df = school_grade_sumamry_df.merge(school_total_summary, on="school_name")

# Reset the index of the DataFrame to allow the school name to be a column.  Rename the column.
school_grade_sumamry_df = school_grade_sumamry_df.reset_index()
school_grade_sumamry_df = school_grade_sumamry_df.rename(columns={"school_name": "School Name"})

# Merge the school test summary DataFrame into the basic school list DataFrame.
school_list_complete = school_list_summary_df.merge(school_grade_sumamry_df, on="School Name")

# Perform calculations to determine the % of students passing math, reading and both subjects based on the total school population
school_list_complete["% Passing Math"] = school_list_complete["math_score_summary"]/school_list_complete["Total Students"]*100
school_list_complete["% Passing Reading"] = school_list_complete["reading_score_summary"]/school_list_complete["Total Students"]*100
school_list_complete["% Passing Both"] = school_list_complete["total_score_summary"]/school_list_complete["Total Students"]*100

# Rename the columns for final formatting
school_list_complete = school_list_complete.rename(columns={"type": "School Type"})
school_list_complete = school_list_complete.rename(columns={"budget": "Total School Budget"})

# Create a data table for analysis before formatting
analysis_df = school_list_complete

# Drop the columns that were used for calculations but no longer needed for the summary
school_list_complete = school_list_complete.drop(columns= ["math_score_summary", "reading_score_summary", "total_score_summary"])

In [14]:
# Perform final formatting of the values of the summary table.
school_list_complete_format = school_list_complete
school_list_complete_format["Total Students"] = school_list_complete_format["Total Students"].astype(int).map("{:,.0f}".format)
school_list_complete_format["Total School Budget"] = school_list_complete_format["Total School Budget"].astype(float).map("${:,.2f}".format)
school_list_complete_format["Per Student Budget"] = school_list_complete_format["Per Student Budget"].astype(float).map("${:,.2f}".format)
school_list_complete_format["Avg Math Score"] = school_list_complete_format["Avg Math Score"].astype(float).map("{:.2f}".format)
school_list_complete_format["Avg Reading Score"] = school_list_complete_format["Avg Reading Score"].astype(float).map("{:.2f}".format)
school_list_complete_format["% Passing Math"] = school_list_complete_format["% Passing Math"].astype(float).map("{:.2f}%".format)
school_list_complete_format["% Passing Reading"] = school_list_complete_format["% Passing Reading"].astype(float).map("{:.2f}%".format)
school_list_complete_format["% Passing Both"] = school_list_complete_format["% Passing Both"].astype(float).map("{:.2f}%".format)

# Set the School Name as the DataFrame index
school_list_complete_format = school_list_complete_format.set_index("School Name")

# Display the final summary table.
school_list_complete_format

Unnamed: 0_level_0,Total Students,School Type,Total School Budget,Per Student Budget,Avg Math Score,Avg Reading Score,% Passing Math,% Passing Reading,% Passing Both
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,4976,District,"$3,124,928.00",$628.00,77.05,81.03,66.68%,81.93%,54.64%
Johnson High School,4761,District,"$3,094,650.00",$650.00,77.07,80.97,66.06%,81.22%,53.54%
Hernandez High School,4635,District,"$3,022,020.00",$652.00,77.29,80.93,66.75%,80.86%,53.53%
Rodriguez High School,3999,District,"$2,547,363.00",$637.00,76.84,80.74,66.37%,80.22%,52.99%
Figueroa High School,2949,District,"$1,884,411.00",$639.00,76.71,81.16,65.99%,80.74%,53.20%
Huang High School,2917,District,"$1,910,635.00",$655.00,76.63,81.18,65.68%,81.32%,53.51%
Ford High School,2739,District,"$1,763,916.00",$644.00,77.1,80.75,68.31%,79.30%,54.29%
Wilson High School,2283,Charter,"$1,319,574.00",$578.00,83.27,83.99,93.87%,96.54%,90.58%
Cabrera High School,1858,Charter,"$1,081,356.00",$582.00,83.06,83.98,94.13%,97.04%,91.33%
Wright High School,1800,Charter,"$1,049,400.00",$583.00,83.68,83.95,93.33%,96.61%,90.33%


## Top Performing Schools (By % Overall Passing)

* Sort and display the top five performing schools by % overall passing.

In [15]:
school_list_complete_format = school_list_complete_format.sort_values("% Passing Both", ascending=False)
school_list_complete_format.head(5)

Unnamed: 0_level_0,Total Students,School Type,Total School Budget,Per Student Budget,Avg Math Score,Avg Reading Score,% Passing Math,% Passing Reading,% Passing Both
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,1858,Charter,"$1,081,356.00",$582.00,83.06,83.98,94.13%,97.04%,91.33%
Thomas High School,1635,Charter,"$1,043,130.00",$638.00,83.42,83.85,93.27%,97.31%,90.95%
Griffin High School,1468,Charter,"$917,500.00",$625.00,83.35,83.82,93.39%,97.14%,90.60%
Wilson High School,2283,Charter,"$1,319,574.00",$578.00,83.27,83.99,93.87%,96.54%,90.58%
Pena High School,962,Charter,"$585,858.00",$609.00,83.84,84.04,94.59%,95.95%,90.54%


## Bottom Performing Schools (By % Overall Passing)

* Sort and display the five worst-performing schools by % overall passing.

In [16]:
school_list_complete_format = school_list_complete_format.sort_values("% Passing Both", ascending=True)
school_list_complete_format.head(5)

Unnamed: 0_level_0,Total Students,School Type,Total School Budget,Per Student Budget,Avg Math Score,Avg Reading Score,% Passing Math,% Passing Reading,% Passing Both
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,3999,District,"$2,547,363.00",$637.00,76.84,80.74,66.37%,80.22%,52.99%
Figueroa High School,2949,District,"$1,884,411.00",$639.00,76.71,81.16,65.99%,80.74%,53.20%
Huang High School,2917,District,"$1,910,635.00",$655.00,76.63,81.18,65.68%,81.32%,53.51%
Hernandez High School,4635,District,"$3,022,020.00",$652.00,77.29,80.93,66.75%,80.86%,53.53%
Johnson High School,4761,District,"$3,094,650.00",$650.00,77.07,80.97,66.06%,81.22%,53.54%


## Math Scores by Grade

* Create a table that lists the average Reading Score for students of each grade level (9th, 10th, 11th, 12th) at each school.

  * Create a pandas series for each grade. Hint: use a conditional statement.
  
  * Group each series by school
  
  * Combine the series into a dataframe
  
  * Optional: give the displayed data cleaner formatting

In [17]:
# Copy the student_data DataFrame to do the analysis
math_df = student_data

# Filter the DataFrame to create DataFrames for each grade
grade9_df = math_df.loc[math_df["grade"] == "9th", :]
grade10_df = math_df.loc[math_df["grade"] == "10th", :]
grade11_df = math_df.loc[math_df["grade"] == "11th", :]
grade12_df = math_df.loc[math_df["grade"] == "12th", :]

# Group the DataFrames by school name
grade9_group = grade9_df.groupby("school_name")
grade10_group = grade10_df.groupby("school_name")
grade11_group = grade11_df.groupby("school_name")
grade12_group = grade12_df.groupby("school_name")

# Calculate the average of each grade's math score
grade9_math_avg = grade9_group["math_score"].mean()
grade10_math_avg = grade10_group["math_score"].mean()
grade11_math_avg = grade11_group["math_score"].mean()
grade12_math_avg = grade12_group["math_score"].mean()

# Create a dictionary to combine the data for each grade
dict_math_by_grade = {"9th Grade": grade9_math_avg,
                      "10th Grade": grade10_math_avg,
                      "11th Grade": grade11_math_avg,
                      "12th Grade": grade12_math_avg
                     }
# Convert the dictionary into a DataFrame
math_by_grade_df = pd.DataFrame(dict_math_by_grade)

# Format the cells in the DataFrame
math_by_grade_df["9th Grade"] = math_by_grade_df["9th Grade"].map("{:.2f}".format)
math_by_grade_df["10th Grade"] = math_by_grade_df["10th Grade"].map("{:.2f}".format)
math_by_grade_df["11th Grade"] = math_by_grade_df["11th Grade"].map("{:.2f}".format)
math_by_grade_df["12th Grade"] = math_by_grade_df["12th Grade"].map("{:.2f}".format)

# Display the final analysis
math_by_grade_df

Unnamed: 0_level_0,9th Grade,10th Grade,11th Grade,12th Grade
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.08,77.0,77.52,76.49
Cabrera High School,83.09,83.15,82.77,83.28
Figueroa High School,76.4,76.54,76.88,77.15
Ford High School,77.36,77.67,76.92,76.18
Griffin High School,82.04,84.23,83.84,83.36
Hernandez High School,77.44,77.34,77.14,77.19
Holden High School,83.79,83.43,85.0,82.86
Huang High School,77.03,75.91,76.45,77.23
Johnson High School,77.19,76.69,77.49,76.86
Pena High School,83.63,83.37,84.33,84.12


## Reading Score by Grade 

* Perform the same operations as above for reading scores

In [18]:
# Copy the student_data DataFrame to do the analysis
reading_df = student_data

# Filter the DataFrame to create DataFrames for each grade
grade9_df = reading_df.loc[reading_df["grade"] == "9th", :]
grade10_df = reading_df.loc[reading_df["grade"] == "10th", :]
grade11_df = reading_df.loc[reading_df["grade"] == "11th", :]
grade12_df = reading_df.loc[reading_df["grade"] == "12th", :]

# Group the DataFrames by school name
grade9_group = grade9_df.groupby("school_name")
grade10_group = grade10_df.groupby("school_name")
grade11_group = grade11_df.groupby("school_name")
grade12_group = grade12_df.groupby("school_name")

# Calculate the average of each grade's reading score
grade9_reading_avg = grade9_group["reading_score"].mean()
grade10_reading_avg = grade10_group["reading_score"].mean()
grade11_reading_avg = grade11_group["reading_score"].mean()
grade12_reading_avg = grade12_group["reading_score"].mean()

# Create a dictionary to combine the data for each grade
dict_reading_by_grade = {"9th Grade": grade9_reading_avg,
                      "10th Grade": grade10_reading_avg,
                      "11th Grade": grade11_reading_avg,
                      "12th Grade": grade12_reading_avg
                     }
# Convert the dictionary into a DataFrame
reading_by_grade_df = pd.DataFrame(dict_reading_by_grade)

# Format the cells in the DataFrame
reading_by_grade_df["9th Grade"] = reading_by_grade_df["9th Grade"].map("{:.2f}".format)
reading_by_grade_df["10th Grade"] = reading_by_grade_df["10th Grade"].map("{:.2f}".format)
reading_by_grade_df["11th Grade"] = reading_by_grade_df["11th Grade"].map("{:.2f}".format)
reading_by_grade_df["12th Grade"] = reading_by_grade_df["12th Grade"].map("{:.2f}".format)

# Display the final analysis
reading_by_grade_df

Unnamed: 0_level_0,9th Grade,10th Grade,11th Grade,12th Grade
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,81.3,80.91,80.95,80.91
Cabrera High School,83.68,84.25,83.79,84.29
Figueroa High School,81.2,81.41,80.64,81.38
Ford High School,80.63,81.26,80.4,80.66
Griffin High School,83.37,83.71,84.29,84.01
Hernandez High School,80.87,80.66,81.4,80.86
Holden High School,83.68,83.32,83.82,84.7
Huang High School,81.29,81.51,81.42,80.31
Johnson High School,81.26,80.77,80.62,81.23
Pena High School,83.81,83.61,84.34,84.59


## Scores by School Spending

* Create a table that breaks down school performances based on average Spending Ranges (Per Student). Use 4 reasonable bins to group school spending. Include in the table each of the following:
  * Average Math Score
  * Average Reading Score
  * % Passing Math
  * % Passing Reading
  * Overall Passing Rate (Average of the above two)

In [19]:
analysis_spend = analysis_df

# Create the bins in which Data will be held
bins = [0, 584.99, 629.99, 644.99, 680]

# Create the names for the bins
group_names = ["<$585", "$585-$630", "$630-$645", "$645-$680"]

# Cut the DataFrame into the groups identified and place in a new column
analysis_spend["Spending Ranges (Per Student)"] = pd.cut(analysis_spend["Per Student Budget"], bins, labels=group_names, include_lowest=True)

# Group the data based on the bins created
analysis_spend_g = analysis_spend.groupby("Spending Ranges (Per Student)")

# Perform the calculations to determine the mean of each category
analysis_group_avg_math = analysis_spend_g["Avg Math Score"].mean()
analysis_group_avg_read = analysis_spend_g["Avg Reading Score"].mean()
analysis_group_avg_pass_math = analysis_spend_g["% Passing Math"].mean()
analysis_group_avg_pass_read = analysis_spend_g["% Passing Reading"].mean()
analysis_group_avg_pass_total = analysis_spend_g["% Passing Both"].mean()

# Develop a dictionary of the data sets
dict_spend_per_student = {"Avg Math Score": analysis_group_avg_math, 
                          "Avg Reading Score": analysis_group_avg_read,
                          "% Passing Math": analysis_group_avg_pass_math,
                          "% Passing Reading": analysis_group_avg_pass_read,
                          "% Passing Both": analysis_group_avg_pass_total
                         }

# Create a DataFrame of the dictionary
spend_per_student = pd.DataFrame(dict_spend_per_student)

# Formatting of the data in the table
spend_per_student["Avg Math Score"] = spend_per_student["Avg Math Score"].astype(float).map("{:.2f}".format)
spend_per_student["Avg Reading Score"] = spend_per_student["Avg Reading Score"].astype(float).map("{:.2f}".format)
spend_per_student["% Passing Math"] = spend_per_student["% Passing Math"].astype(float).map("{:.2f}%".format)
spend_per_student["% Passing Reading"] = spend_per_student["% Passing Reading"].astype(float).map("{:.2f}%".format)
spend_per_student["% Passing Both"] = spend_per_student["% Passing Both"].astype(float).map("{:.2f}%".format)

# Display the table
spend_per_student

Unnamed: 0_level_0,Avg Math Score,Avg Reading Score,% Passing Math,% Passing Reading,% Passing Both
Spending Ranges (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$585,83.46,83.93,93.46%,96.61%,90.37%
$585-$630,81.9,83.16,87.13%,92.72%,81.42%
$630-$645,78.52,81.62,73.48%,84.39%,62.86%
$645-$680,77.0,81.03,66.16%,81.13%,53.53%


## Scores by School Size

* Perform the same operations as above, based on school size.

In [20]:
analysis_size = analysis_df

# Create the bins in which Data will be held
bins = [0, 999, 1999, 5000]

# Create the names for the bins
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000"]

# Cut the DataFrame into the groups identified and place in a new column
analysis_size["School Size"] = pd.cut(analysis_size["Total Students"], bins, labels=group_names, include_lowest=True)

# Group the data based on the bins created
analysis_size_g = analysis_size.groupby("School Size")

# Perform the calculations to determine the mean of each category
analysis_group_avg_math = analysis_size_g["Avg Math Score"].mean()
analysis_group_avg_read = analysis_size_g["Avg Reading Score"].mean()
analysis_group_avg_pass_math = analysis_size_g["% Passing Math"].mean()
analysis_group_avg_pass_read = analysis_size_g["% Passing Reading"].mean()
analysis_group_avg_pass_total = analysis_size_g["% Passing Both"].mean()

# Develop a dictionary of the data sets
dict_school_size = {"Avg Math Score": analysis_group_avg_math, 
                          "Avg Reading Score": analysis_group_avg_read,
                          "% Passing Math": analysis_group_avg_pass_math,
                          "% Passing Reading": analysis_group_avg_pass_read,
                          "% Passing Both": analysis_group_avg_pass_total
                         }

# Create a DataFrame of the dictionary
school_size_results = pd.DataFrame(dict_school_size)

# Formatting of the data in the table
school_size_results["Avg Math Score"] = school_size_results["Avg Math Score"].astype(float).map("{:.2f}".format)
school_size_results["Avg Reading Score"] = school_size_results["Avg Reading Score"].astype(float).map("{:.2f}".format)
school_size_results["% Passing Math"] = school_size_results["% Passing Math"].astype(float).map("{:.2f}%".format)
school_size_results["% Passing Reading"] = school_size_results["% Passing Reading"].astype(float).map("{:.2f}%".format)
school_size_results["% Passing Both"] = school_size_results["% Passing Both"].astype(float).map("{:.2f}%".format)

# Display the table
school_size_results

Unnamed: 0_level_0,Avg Math Score,Avg Reading Score,% Passing Math,% Passing Reading,% Passing Both
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.82,83.93,93.55%,96.10%,89.88%
Medium (1000-2000),83.37,83.86,93.60%,96.79%,90.62%
Large (2000-5000,77.75,81.34,69.96%,82.77%,58.29%


## Scores by School Type

* Perform the same operations as above, based on school type

In [21]:
analysis_type = analysis_df

# Group the data based on the type of school
analysis_type_g = analysis_type.groupby("School Type")

# Perform the calculations to determine the mean of each category
analysis_group_avg_math = analysis_type_g["Avg Math Score"].mean()
analysis_group_avg_read = analysis_type_g["Avg Reading Score"].mean()
analysis_group_avg_pass_math = analysis_type_g["% Passing Math"].mean()
analysis_group_avg_pass_read = analysis_type_g["% Passing Reading"].mean()
analysis_group_avg_pass_total = analysis_type_g["% Passing Both"].mean()

# Develop a dictionary of the data sets
dict_school_type = {"Avg Math Score": analysis_group_avg_math, 
                          "Avg Reading Score": analysis_group_avg_read,
                          "% Passing Math": analysis_group_avg_pass_math,
                          "% Passing Reading": analysis_group_avg_pass_read,
                          "% Passing Both": analysis_group_avg_pass_total
                         }

# Create a DataFrame of the dictionary
school_type_results = pd.DataFrame(dict_school_type)

# Formatting of the data in the table
school_type_results["Avg Math Score"] = school_type_results["Avg Math Score"].astype(float).map("{:.2f}".format)
school_type_results["Avg Reading Score"] = school_type_results["Avg Reading Score"].astype(float).map("{:.2f}".format)
school_type_results["% Passing Math"] = school_type_results["% Passing Math"].astype(float).map("{:.2f}%".format)
school_type_results["% Passing Reading"] = school_type_results["% Passing Reading"].astype(float).map("{:.2f}%".format)
school_type_results["% Passing Both"] = school_type_results["% Passing Both"].astype(float).map("{:.2f}%".format)

# Display the table
school_type_results

Unnamed: 0_level_0,Avg Math Score,Avg Reading Score,% Passing Math,% Passing Reading,% Passing Both
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.47,83.9,93.62%,96.59%,90.43%
District,76.96,80.97,66.55%,80.80%,53.67%
