In [1]:
# ES TO DO:
# FIX FORMATTING ISSUE WITH naN (list as challenge).
# MERGE ALL CELLS WHERE APPROPRIATE BASED NECESSARY OUTPUTS DONE.
    # RETAIN ALL CELLS WRT THOMAS HIGH AS INDIVIDUAL TO FACILITATE REMOVAL
# REFACTOR FLOW HERE TO INCLUDE PYCITYSCHOOLS DATA BEFORE AND AFTER INLINE

In [2]:
# Add dependencies
import pandas as pd
import os
import numpy as np

school_data_csv = os.path.join("Resources","schools_complete.csv")
student_data_csv = os.path.join("Resources","students_complete.csv")

#load school and student data into pandas dataframe.
school_df = pd.read_csv(school_data_csv)
student_df = pd.read_csv(student_data_csv)

## DATA CLEANUP

The following block of code conducts two necessary cleanups of the data:
* A trend of students tampering with data by way of using honorifics (Mr., Mrs., etc) and academic credentials (DDS, MD, etc) during name entry
* Eliminating Thomas High School 9th graders from data set due to tampering scandal.

In [3]:
# generate list of identified prefixes and suffixes for cleanup
prefixes_suffixes = ["Mr. ", "Mrs. ", "Miss ", "Ms. ", "Dr. ", " DDS", " DVM", " MD", " PhD"]
# print(prefixes_suffixes)

# Iterate through prefixes and suffixes and remove all usages from data set
for word in prefixes_suffixes:
    student_df["student_name"] = student_df["student_name"].str.replace(word,"")

# Nullify 9th grade scores from Thomas High due to cheating
student_df.loc[(student_df["school_name"] == "Thomas High School") & (student_df["grade"] == "9th"),"reading_score"] = np.nan
student_df.loc[(student_df["school_name"] == "Thomas High School") & (student_df["grade"] == "9th"),"math_score"] = np.nan

# Confirm 9th graders at Thomas High have grades set to NaN
# student_df.loc[(student_df["school_name"] == "Thomas High School") & (student_df["grade"] == "9th")]

# student_df

  import sys


## DATA ANALYSIS

This is where the analysis component of this script begins.

### District-level Analysis

This first step calculates aggregations for the district as a whole, including:
* Total number of schools, students, and budget
* Average reading and math test scores
* Number of students passing in reading, math, and overall

In [4]:
# Combine school and student dataframes into single data set
school_data_complete_df = pd.merge(student_df, school_df, on=["school_name", "school_name"])

# identify total number of students and decrement count by number of Thomas High 9th graders for purposes of grade tallies.
student_count = school_data_complete_df["Student ID"].count()
students_to_remove = student_df["student_name"].loc[(student_df["school_name"] == "Thomas High School") & (student_df["grade"] == "9th")].count()
amended_student_count = student_count - students_to_remove

# Calculate aggregate district-wide metrics based on complete school data
school_count = school_df["School ID"].count()
total_budget = school_df["budget"].sum()
average_reading_score = school_data_complete_df["reading_score"].mean()
average_math_score = school_data_complete_df["math_score"].mean()

# Calculate number and percentage of students passing math or reading
passing_math = school_data_complete_df[school_data_complete_df["math_score"] >= 70]
passing_reading = school_data_complete_df[school_data_complete_df["reading_score"] >= 70]

passing_math_count = passing_math["Student ID"].count()
passing_reading_count = passing_reading["Student ID"].count()

passing_math_pct = (passing_math_count / float(amended_student_count)) * 100
passing_reading_pct = (passing_reading_count / float(amended_student_count)) * 100                                     

# get count of students passing *both* math and reading
passing_math_reading = school_data_complete_df[(school_data_complete_df["math_score"] >= 70) 
                                               & (school_data_complete_df["reading_score"] >= 70)]
passing_math_reading_count = passing_math_reading["Student ID"].count()
passing_math_reading_pct = (passing_math_reading_count / float(amended_student_count)) * 100

# Assemble data frame for district summary statistics.
# Use dictionary to store each kvp.
district_summary_df = pd.DataFrame(
        [
            {"Total Schools": school_count,
            "Total Students": student_count,
            "Total Budget": total_budget,
            "Average Reading Score": average_reading_score,
            "Average Math Score": average_math_score,
            "% Passing Reading": passing_reading_pct,
            "% Passing Math": passing_math_pct,
            "% Overall Passing": passing_math_reading_pct}
        ]
)

# Apply formatting to new cells
district_summary_df["Total Students"] = district_summary_df["Total Students"].map("{:,}".format)
district_summary_df["Total Budget"] = district_summary_df["Total Budget"].map("${:,.2f}".format)
district_summary_df["Average Reading Score"] = district_summary_df["Average Reading Score"].map("{:.1f}".format)
district_summary_df["Average Math Score"] = district_summary_df["Average Math Score"].map("{:.1f}".format)
district_summary_df["% Passing Reading"] = district_summary_df["% Passing Reading"].map("{:.1f}".format)
district_summary_df["% Passing Math"] = district_summary_df["% Passing Math"].map("{:.1f}".format)
district_summary_df["% Overall Passing"] = district_summary_df["% Overall Passing"].map("{:.1f}".format)

# reassign column orders
new_column_order = ["Total Schools", "Total Students", "Total Budget", "Average Math Score", "Average Reading Score",
                     "% Passing Math", "% Passing Reading", "% Overall Passing"]
                    
district_summary_df = district_summary_df[new_column_order]
district_summary_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing
0,15,39170,"$24,649,428.00",78.9,81.9,74.8,85.7,64.9


### School-level averages

In [5]:
per_school_types = school_df.set_index(["school_name"])["type"]

# Extracting budget and student body size into Series, calculating additional series to identify per/capita budgetary spend
per_school_counts = school_df.set_index("school_name")["size"]
per_school_budget = school_df.set_index("school_name")["budget"]
per_school_capita = per_school_budget / per_school_counts

# Break up into more granular dataframes for each grade type
per_school_math_averages = school_data_complete_df.groupby(["school_name"]).mean()["math_score"]
per_school_reading_averages = school_data_complete_df.groupby(["school_name"]).mean()["reading_score"]

# Filter data frames to identify all students passing math and reading
per_school_passing_math = school_data_complete_df[(school_data_complete_df["math_score"] >= 70)]
per_school_passing_reading = school_data_complete_df[(school_data_complete_df["reading_score"] >= 70)]

#Calculate total number of students passing math
school_passing_math = per_school_passing_math.groupby(["school_name"]).count()["student_name"]
school_passing_reading = per_school_passing_reading.groupby(["school_name"]).count()["student_name"]

# overwrite raw num with percentages.  Is this acceptable best practice?
school_passing_math = school_passing_math / per_school_counts * 100
school_passing_reading = school_passing_reading / per_school_counts * 100

# aggregate overall passing count by school
students_passing_math_reading = school_data_complete_df[(school_data_complete_df["math_score"] >= 70) 
                                                           & (school_data_complete_df["reading_score"] >= 70)]
per_school_passing_math_reading = students_passing_math_reading.groupby("school_name").count()["student_name"]
                                                  
#Modify data frame to display percentages
per_school_passing_math_reading = per_school_passing_math_reading / per_school_counts * 100

#Generate and display DataFrame.                                                  
per_school_results_df = pd.DataFrame({"School Type": per_school_types,
                                  "Student Count": per_school_counts,
                                   "Total School Budget": per_school_budget,
                                   "Per Student Budget": per_school_capita,
                                   "Average Math Score": per_school_math_averages,
                                   "Average Reading Score": per_school_reading_averages,
                                   "% Passing Math": school_passing_math,
                                   "% Passing Reading": school_passing_reading,
                                   "% Passing Overall": per_school_passing_math_reading
                                  })


# per_school_results_df

### Additional Cleanup for Thomas High score tampering

This section runs secondary calculations in service of amending Thomas High overall statistics by removing all ninth graders from the data set, and returns the amended data set.


In [6]:
# temporary home to per_school_results_df tweaks
eligible_thomas_students = student_df["student_name"].loc[(student_df["school_name"] == "Thomas High School") & (student_df["grade"] != "9th")].count()
eligible_thomas_students_passing_math = pd.DataFrame(
    student_df.loc[(student_df["school_name"] == "Thomas High School") & (student_df["grade"] != "9th") & (student_df["math_score"] >= 70)]
)

eligible_thomas_students_passing_reading = pd.DataFrame(
    student_df.loc[(student_df["school_name"] == "Thomas High School") & (student_df["grade"] != "9th") & (student_df["reading_score"] >= 70)]
)

eligible_thomas_students_passing_overall = pd.DataFrame(
    student_df.loc[(student_df["school_name"] == "Thomas High School") & (student_df["grade"] != "9th") & (student_df["math_score"] >= 70) & (student_df["reading_score"] >= 70)]
)

pct_eligible_thomas_math_passing = (eligible_thomas_students_passing_math["student_name"].count() / eligible_thomas_students) * 100
pct_eligible_thomas_reading_passing = (eligible_thomas_students_passing_reading["student_name"].count() / eligible_thomas_students) * 100
pct_eligible_thomas_overall_passing = (eligible_thomas_students_passing_overall["student_name"].count() / eligible_thomas_students) * 100

per_school_results_df.loc[(per_school_results_df.index == "Thomas High School"),"% Passing Math"] = pct_eligible_thomas_math_passing
per_school_results_df.loc[(per_school_results_df.index == "Thomas High School"),"% Passing Reading"] = pct_eligible_thomas_reading_passing
per_school_results_df.loc[(per_school_results_df.index == "Thomas High School"),"% Passing Overall"] = pct_eligible_thomas_overall_passing
per_school_results_df

Unnamed: 0_level_0,School Type,Student Count,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bailey High School,District,4976,3124928,628.0,77.048432,81.033963,66.680064,81.93328,54.642283
Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Ford High School,District,2739,1763916,644.0,77.102592,80.746258,68.309602,79.299014,54.289887
Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Holden High School,Charter,427,248087,581.0,83.803279,83.814988,92.505855,96.252927,89.227166
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Johnson High School,District,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,53.539172
Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


### Best and Worst Performing Schools

The following two data frames show the best and worst performing schools according to their overall passing rate.

In [7]:
# sort and show top 5 schools
top_schools = per_school_results_df.sort_values(["% Passing Overall"], ascending=False)
top_schools.head(5)

Unnamed: 0_level_0,School Type,Student Count,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,Charter,1858,1081356,582.0,83.061895,83.97578,94.133477,97.039828,91.334769
Thomas High School,Charter,1635,1043130,638.0,83.350937,83.896082,93.18569,97.018739,90.630324
Griffin High School,Charter,1468,917500,625.0,83.351499,83.816757,93.392371,97.138965,90.599455
Wilson High School,Charter,2283,1319574,578.0,83.274201,83.989488,93.867718,96.539641,90.582567
Pena High School,Charter,962,585858,609.0,83.839917,84.044699,94.594595,95.945946,90.540541


In [8]:
# sort and show bottom 5 schools
top_schools = per_school_results_df.sort_values(["% Passing Overall"], ascending=True)
top_schools.head(5)

Unnamed: 0_level_0,School Type,Student Count,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,District,3999,2547363,637.0,76.842711,80.744686,66.366592,80.220055,52.988247
Figueroa High School,District,2949,1884411,639.0,76.711767,81.15802,65.988471,80.739234,53.204476
Huang High School,District,2917,1910635,655.0,76.629414,81.182722,65.683922,81.316421,53.513884
Hernandez High School,District,4635,3022020,652.0,77.289752,80.934412,66.752967,80.862999,53.527508
Johnson High School,District,4761,3094650,650.0,77.072464,80.966394,66.057551,81.222432,53.539172


### Grade- and school-level analysis

These dataframes display performance for math, reading, and overall passing rates by school and grade.

In [9]:
ninth_graders = school_data_complete_df[(school_data_complete_df["grade"] == "9th")]
tenth_graders = school_data_complete_df[(school_data_complete_df["grade"] == "10th")]
eleventh_graders = school_data_complete_df[(school_data_complete_df["grade"] == "11th")]
twelfth_graders = school_data_complete_df[(school_data_complete_df["grade"] == "12th")]

ninth_grade_math_scores = ninth_graders.groupby("school_name").mean()["math_score"]
tenth_grade_math_scores = tenth_graders.groupby("school_name").mean()["math_score"]
eleventh_grade_math_scores = eleventh_graders.groupby("school_name").mean()["math_score"]
twelfth_grade_math_scores = twelfth_graders.groupby("school_name").mean()["math_score"]

ninth_grade_reading_scores = ninth_graders.groupby("school_name").mean()["reading_score"]
tenth_grade_reading_scores = tenth_graders.groupby("school_name").mean()["reading_score"]
eleventh_grade_reading_scores = eleventh_graders.groupby("school_name").mean()["reading_score"]
twelfth_grade_reading_scores = twelfth_graders.groupby("school_name").mean()["reading_score"]

math_scores_by_grade = pd.DataFrame({
    "9th": ninth_grade_math_scores,
    "10th": tenth_grade_math_scores,
    "11th": eleventh_grade_math_scores,
    "12th": twelfth_grade_math_scores})

reading_scores_by_grade = pd.DataFrame({
    "9th": ninth_grade_reading_scores,
    "10th": tenth_grade_reading_scores,
    "11th": eleventh_grade_reading_scores,
    "12th": twelfth_grade_reading_scores})

In [10]:
math_scores_by_grade["9th"] = math_scores_by_grade["9th"].map("{:.1f}".format)
math_scores_by_grade["10th"] = math_scores_by_grade["10th"].map("{:.1f}".format)
math_scores_by_grade["11th"] = math_scores_by_grade["11th"].map("{:.1f}".format)
math_scores_by_grade["12th"] = math_scores_by_grade["12th"].map("{:.1f}".format)

#make sure cols are in correct order
math_scores_by_grade = math_scores_by_grade[["9th", "10th", "11th", "12th"]]

#remove index name
math_scores_by_grade.index.name = None

math_scores_by_grade

Unnamed: 0,9th,10th,11th,12th
Bailey High School,77.1,77.0,77.5,76.5
Cabrera High School,83.1,83.2,82.8,83.3
Figueroa High School,76.4,76.5,76.9,77.2
Ford High School,77.4,77.7,76.9,76.2
Griffin High School,82.0,84.2,83.8,83.4
Hernandez High School,77.4,77.3,77.1,77.2
Holden High School,83.8,83.4,85.0,82.9
Huang High School,77.0,75.9,76.4,77.2
Johnson High School,77.2,76.7,77.5,76.9
Pena High School,83.6,83.4,84.3,84.1


In [11]:
reading_scores_by_grade["9th"] = reading_scores_by_grade["9th"].map("{:.1f}".format)
reading_scores_by_grade["10th"] = reading_scores_by_grade["10th"].map("{:.1f}".format)
reading_scores_by_grade["11th"] = reading_scores_by_grade["11th"].map("{:.1f}".format)
reading_scores_by_grade["12th"] = reading_scores_by_grade["12th"].map("{:.1f}".format)

#make sure cols are in correct order
reading_scores_by_grade = math_scores_by_grade[["9th", "10th", "11th", "12th"]]

#remove index name
reading_scores_by_grade.index.name = None

reading_scores_by_grade

Unnamed: 0,9th,10th,11th,12th
Bailey High School,77.1,77.0,77.5,76.5
Cabrera High School,83.1,83.2,82.8,83.3
Figueroa High School,76.4,76.5,76.9,77.2
Ford High School,77.4,77.7,76.9,76.2
Griffin High School,82.0,84.2,83.8,83.4
Hernandez High School,77.4,77.3,77.1,77.2
Holden High School,83.8,83.4,85.0,82.9
Huang High School,77.0,75.9,76.4,77.2
Johnson High School,77.2,76.7,77.5,76.9
Pena High School,83.6,83.4,84.3,84.1


### Performance by Budget Size

This DataFrame segregates schools into 4 bins of roughly equal composition to determine performance based on spending amount per student. 

In [12]:
# Per school statistics budgetary
# per_school_capita.describe()

# create bins for segmenting schools based on per capita budget
spending_bins = [0, 585, 630, 645, 675]
group_names = ["<$584", "$585-629", "$630-644", "$645-675"]
# create segment data using cut() and then identify schools in buckets using groupby() and count()
per_school_capita.groupby(pd.cut(per_school_capita, spending_bins)).count()

per_school_results_df["Spending Ranges (Per Student)"] = pd.cut(per_school_capita, spending_bins, labels=group_names)

# Originally had error with using mean() due to prior formatting issues.  map(format) converts float64 to object.
# Resolved here:
# https://towardsdatascience.com/a-quick-way-to-reformat-columns-in-a-pandas-dataframe-80d0b70de026
spending_math_scores = per_school_results_df.groupby(['Spending Ranges (Per Student)']).mean()['Average Math Score'].astype(float)
spending_reading_scores = per_school_results_df.groupby(['Spending Ranges (Per Student)']).mean()['Average Reading Score'].astype(float)
spending_passing_math = per_school_results_df.groupby(['Spending Ranges (Per Student)']).mean()['% Passing Math'].astype(float)
spending_passing_reading = per_school_results_df.groupby(['Spending Ranges (Per Student)']).mean()['% Passing Reading'].astype(float)
overall_passing_spending = per_school_results_df.groupby(['Spending Ranges (Per Student)']).mean()['% Passing Overall'].astype(float)

spending_summary_df = pd.DataFrame(
    {"Average Math Score": spending_math_scores,
     "Average Reading Score": spending_reading_scores,
     "% Passing Math": spending_passing_math,
     "% Passing Reading": spending_passing_reading,
     "% Passing Overall": overall_passing_spending
    }
)

spending_summary_df["Average Math Score"] = spending_summary_df["Average Math Score"].map("{:.1f}".format)
spending_summary_df["Average Reading Score"] = spending_summary_df["Average Reading Score"].map("{:.1f}".format)
spending_summary_df["% Passing Math"] = spending_summary_df["% Passing Math"].map("{:.0f}".format)
spending_summary_df["% Passing Reading"] = spending_summary_df["% Passing Reading"].map("{:.0f}".format)
spending_summary_df["% Passing Overall"] = spending_summary_df["% Passing Overall"].map("{:.0f}".format)

spending_summary_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
Spending Ranges (Per Student),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
<$584,83.5,83.9,93,97,90
$585-629,81.9,83.2,87,93,81
$630-644,78.5,81.6,73,84,63
$645-675,77.0,81.0,66,81,54


### Performance by School Size

This DataFrame segregates schools into 4 bins of roughly equal composition to determine performance per student body size. 

In [13]:
# Initial analysis for bucketing schools by size
# per_school_counts.describe()

# create bins for segmenting schools based on size
size_bins = [0, 1000, 2000, 5000]
group_names = ["Small (<1000)", "Medium (1000-2000)", "Large (2000-5000)"]
per_school_results_df["School Size"] = pd.cut(per_school_results_df["Student Count"], size_bins, labels=group_names)

# Create series based on passing rates
school_size_math_scores = per_school_results_df.groupby(["School Size"]).mean()["Average Math Score"]
school_size_reading_scores = per_school_results_df.groupby(["School Size"]).mean()["Average Reading Score"]
school_size_passing_math = per_school_results_df.groupby(["School Size"]).mean()["% Passing Math"]
school_size_passing_reading = per_school_results_df.groupby(["School Size"]).mean()["% Passing Reading"]
school_size_passing_overall = per_school_results_df.groupby(["School Size"]).mean()["% Passing Overall"]

# Aggregate series into DataFrame.
school_size_summary_df = pd.DataFrame(
    {"Average Math Score": school_size_math_scores,
     "Average Reading Score": school_size_reading_scores,
     "% Passing Math": school_size_passing_math,
     "% Passing Reading": school_size_passing_reading,
     "% Passing Overall": school_size_passing_overall
    }
)


# Apply Formatting.
school_size_summary_df["Average Math Score"] = school_size_summary_df["Average Math Score"].map("{:.1f}".format)
school_size_summary_df["Average Reading Score"] = school_size_summary_df["Average Reading Score"].map("{:.1f}".format)
school_size_summary_df["% Passing Math"] = school_size_summary_df["% Passing Math"].map("{:.0f}".format)
school_size_summary_df["% Passing Reading"] = school_size_summary_df["% Passing Reading"].map("{:.0f}".format)
school_size_summary_df["% Passing Overall"] = school_size_summary_df["% Passing Overall"].map("{:.0f}".format)

school_size_summary_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.8,83.9,94,96,90
Medium (1000-2000),83.4,83.9,94,97,91
Large (2000-5000),77.7,81.3,70,83,58


### Performance by School Type

Here we segregate schools based on whether they are district or charter schools and compare performance.

In [14]:
school_type_math_scores = per_school_results_df.groupby("School Type").mean()["Average Math Score"]
school_type_reading_scores = per_school_results_df.groupby("School Type").mean()["Average Reading Score"]
school_type_passing_math = per_school_results_df.groupby("School Type").mean()["% Passing Math"]
school_type_passing_reading = per_school_results_df.groupby("School Type").mean()["% Passing Reading"]
school_type_passing_overall = per_school_results_df.groupby("School Type").mean()["% Passing Overall"]

school_type_summary_df = pd.DataFrame(
    {"Average Math Score": school_type_math_scores,
    "Average Reading Score": school_type_reading_scores,
    "% Passing Math": school_type_passing_math,
     "% Passing Reading": school_type_passing_reading,
     "% Passing Overall": school_type_passing_overall
    }
)


school_type_summary_df["Average Math Score"] = school_type_summary_df["Average Math Score"].map("{:.1f}".format)
school_type_summary_df["Average Reading Score"] = school_type_summary_df["Average Reading Score"].map("{:.1f}".format)
school_type_summary_df["% Passing Math"] = school_type_summary_df["% Passing Math"].map("{:.0f}".format)
school_type_summary_df["% Passing Reading"] = school_type_summary_df["% Passing Reading"].map("{:.0f}".format)
school_type_summary_df["% Passing Overall"] = school_type_summary_df["% Passing Overall"].map("{:.0f}".format)

school_type_summary_df

Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Passing Overall
School Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.5,83.9,94,97,90
District,77.0,81.0,67,81,54


# Contained here at the bottom due to data casting issues.

In [15]:
per_school_results_df["Student Count"] = per_school_results_df["Student Count"].map("{:,}".format)
per_school_results_df["Total School Budget"] = per_school_results_df["Total School Budget"].map("${:,.2f}".format)
per_school_results_df["Per Student Budget"] = per_school_results_df["Per Student Budget"].map("${:,.2f}".format)
# per_school_results_df["Average Math Score"] = per_school_results_df["Average Math Score"].map("{:.1f}".format)
per_school_results_df["Average Reading Score"] = per_school_results_df["Average Reading Score"].map("{:.1f}".format)
per_school_results_df["% Passing Math"] = per_school_results_df["% Passing Math"].map("{:.1f}".format)
per_school_results_df["% Passing Reading"] = per_school_results_df["% Passing Reading"].map("{:.1f}".format)
per_school_results_df["% Passing Overall"] = per_school_results_df["% Passing Overall"].map("{:.1f}".format)

per_school_results_df.dtypes

School Type                        object
Student Count                      object
Total School Budget                object
Per Student Budget                 object
Average Math Score                float64
Average Reading Score              object
% Passing Math                     object
% Passing Reading                  object
% Passing Overall                  object
Spending Ranges (Per Student)    category
School Size                      category
dtype: object