In [1]:
# import pandas library
import pandas as pd
import numpy as np
import os

# Set paths for file
file_schools = "schools_complete.csv"
file_students = "students_complete.csv"

# Read Data file with the pandas library
file_schools_df = pd.read_csv(file_schools)
file_students_df = pd.read_csv(file_students)   

# Show just the header for schools
file_schools_df.head()

Unnamed: 0,School ID,name,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [2]:
#Inspect Columns 
file_schools_df.columns

Index(['School ID', 'name', 'type', 'size', 'budget'], dtype='object')

In [3]:
#Rename column name "name" in file_schools_df to "school" since student file also has a name column.
file_schools_df = file_schools_df.rename(columns={"name": "school"})
file_schools_df.head()

Unnamed: 0,School ID,school,type,size,budget
0,0,Huang High School,District,2917,1910635
1,1,Figueroa High School,District,2949,1884411
2,2,Shelton High School,Charter,1761,1056600
3,3,Hernandez High School,District,4635,3022020
4,4,Griffin High School,Charter,1468,917500


In [4]:
#Dataframe of the Students file
file_students_df.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score
0,0,Paul Bradley,M,9th,Huang High School,66,79
1,1,Victor Smith,M,12th,Huang High School,94,61
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58
4,4,Bonnie Ray,F,9th,Huang High School,97,84


In [5]:
total_students_per_school = file_students_df.groupby('school')
total_students_per_school[['name']].count()

Unnamed: 0_level_0,name
school,Unnamed: 1_level_1
Bailey High School,4976
Cabrera High School,1858
Figueroa High School,2949
Ford High School,2739
Griffin High School,1468
Hernandez High School,4635
Holden High School,427
Huang High School,2917
Johnson High School,4761
Pena High School,962


In [7]:
# DISTRICT SUMMARY



# Total number of Schools in the schools_complete.csv
total_schools = file_schools_df["school"].count()

# Total Students in the students_complete.csv
total_students = file_students_df["school"].count()

# Total Budget formatted to include $
total_budget = "${:,.2f}".format(file_schools_df["budget"].sum())

# Average Math Score
ave_math_score = round((file_students_df["math_score"].mean()), 2)

# Average Reading Score
ave_reading_score = round((file_students_df["reading_score"].mean()), 2)

# % Passing Math - assuming the scores are 60 or greater
math_passed_count = file_students_df.loc[file_students_df["math_score"] >= 60].count()["name"]
per_passing_math = round(((math_passed_count / total_students) * 100), 2)

# % Passing Reading - assuming the scores are 60 or greater
reading_passed_count = file_students_df.loc[file_students_df["reading_score"] >= 60].count()["name"]
per_passing_reading = round(((reading_passed_count / total_students) * 100), 2)

# % overall passing rate - assuming students who passed both reading and math

per_overall_passing_rate = round(((per_passing_math + per_passing_reading) / 2), 2)


# Create data dictionary and dataframe
District_Record = {'Total Schools':[total_schools],'Total Students': [total_students],
                    'Total Budget':[total_budget], 'Average Math Score': [ave_math_score],
                    'Average Reading Score':[ave_reading_score],
                    '% Passing Math':[per_passing_math],
                    '% Passing Reading':[per_passing_reading],
                    '% Overall Passing Rate': [per_overall_passing_rate]
                    }

pd_Summary = pd.DataFrame(District_Record)
pd_Summary = pd_Summary[["Total Schools", "Total Students", "Total Budget", 
                         "Average Math Score", "Average Reading Score",
                         "% Passing Math", "% Passing Reading", "% Overall Passing Rate"]
                     ]
#Print Header for District SUmmmary
print("DISTRICT SUMMARY\n")
print("---------------------------------------------------------------------\n")
pd_Summary


Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
0,15,39170,"$24,649,428.00",78.99,81.88,92.45,100.0,96.22


In [8]:
# SCHOOL SUMMARY

# Merge School and Student file on the school name
df_merged = file_students_df.merge(file_schools_df, how = 'left', on = 'school')
df_merged.head()

Unnamed: 0,Student ID,name,gender,grade,school,reading_score,math_score,School ID,type,size,budget
0,0,Paul Bradley,M,9th,Huang High School,66,79,0,District,2917,1910635
1,1,Victor Smith,M,12th,Huang High School,94,61,0,District,2917,1910635
2,2,Kevin Rodriguez,M,12th,Huang High School,90,60,0,District,2917,1910635
3,3,Dr. Richard Scott,M,12th,Huang High School,67,58,0,District,2917,1910635
4,4,Bonnie Ray,F,9th,Huang High School,97,84,0,District,2917,1910635


In [9]:
# Group the merged data by school name
group_df_merged = df_merged.set_index('school').groupby(['school'])
group_df_merged.head()

Unnamed: 0_level_0,Student ID,name,gender,grade,reading_score,math_score,School ID,type,size,budget
school,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Huang High School,0,Paul Bradley,M,9th,66,79,0,District,2917,1910635
Huang High School,1,Victor Smith,M,12th,94,61,0,District,2917,1910635
Huang High School,2,Kevin Rodriguez,M,12th,90,60,0,District,2917,1910635
Huang High School,3,Dr. Richard Scott,M,12th,67,58,0,District,2917,1910635
Huang High School,4,Bonnie Ray,F,9th,97,84,0,District,2917,1910635
Figueroa High School,2917,Amy Jacobs,F,10th,85,87,1,District,2949,1884411
Figueroa High School,2918,Nathan Campbell,M,12th,97,84,1,District,2949,1884411
Figueroa High School,2919,Randall Stewart,M,12th,67,77,1,District,2949,1884411
Figueroa High School,2920,Jennifer Brown,F,9th,97,64,1,District,2949,1884411
Figueroa High School,2921,Denise Lopez,F,10th,79,64,1,District,2949,1884411


In [10]:
# Get the type of school, District or Charter

school_type=file_schools_df.set_index('school')['type']
school_type

school
Huang High School        District
Figueroa High School     District
Shelton High School       Charter
Hernandez High School    District
Griffin High School       Charter
Wilson High School        Charter
Cabrera High School       Charter
Bailey High School       District
Holden High School        Charter
Pena High School          Charter
Wright High School        Charter
Rodriguez High School    District
Johnson High School      District
Ford High School         District
Thomas High School        Charter
Name: type, dtype: object

In [11]:
# Get Total Students per School

total_students = group_df_merged['name'].count()
total_students

school
Bailey High School       4976
Cabrera High School      1858
Figueroa High School     2949
Ford High School         2739
Griffin High School      1468
Hernandez High School    4635
Holden High School        427
Huang High School        2917
Johnson High School      4761
Pena High School          962
Rodriguez High School    3999
Shelton High School      1761
Thomas High School       1635
Wilson High School       2283
Wright High School       1800
Name: name, dtype: int64

In [12]:
# Get Total Budget per shool
total_school_budget = file_schools_df.set_index('school')['budget']
total_school_budget

school
Huang High School        1910635
Figueroa High School     1884411
Shelton High School      1056600
Hernandez High School    3022020
Griffin High School       917500
Wilson High School       1319574
Cabrera High School      1081356
Bailey High School       3124928
Holden High School        248087
Pena High School          585858
Wright High School       1049400
Rodriguez High School    2547363
Johnson High School      3094650
Ford High School         1763916
Thomas High School       1043130
Name: budget, dtype: int64

In [13]:
# Calculate Per Student Budget
per_student_budget=file_schools_df.set_index('school')['budget']/file_schools_df.set_index('school')['size']
per_student_budget

school
Huang High School        655.0
Figueroa High School     639.0
Shelton High School      600.0
Hernandez High School    652.0
Griffin High School      625.0
Wilson High School       578.0
Cabrera High School      582.0
Bailey High School       628.0
Holden High School       581.0
Pena High School         609.0
Wright High School       583.0
Rodriguez High School    637.0
Johnson High School      650.0
Ford High School         644.0
Thomas High School       638.0
dtype: float64

In [14]:
# Calculate Average Math score by school
average_math_score = round((group_df_merged['math_score'].mean()),2)
average_math_score


school
Bailey High School       77.05
Cabrera High School      83.06
Figueroa High School     76.71
Ford High School         77.10
Griffin High School      83.35
Hernandez High School    77.29
Holden High School       83.80
Huang High School        76.63
Johnson High School      77.07
Pena High School         83.84
Rodriguez High School    76.84
Shelton High School      83.36
Thomas High School       83.42
Wilson High School       83.27
Wright High School       83.68
Name: math_score, dtype: float64

In [15]:
# Calculate Average Reading Score by School
average_reading_score = round((group_df_merged['reading_score'].mean()),2)
average_reading_score


school
Bailey High School       81.03
Cabrera High School      83.98
Figueroa High School     81.16
Ford High School         80.75
Griffin High School      83.82
Hernandez High School    80.93
Holden High School       83.81
Huang High School        81.18
Johnson High School      80.97
Pena High School         84.04
Rodriguez High School    80.74
Shelton High School      83.73
Thomas High School       83.85
Wilson High School       83.99
Wright High School       83.96
Name: reading_score, dtype: float64

In [16]:
# Calculate % Passing Math 
per_passing_math = round((df_merged[df_merged['math_score'] >= 60].groupby('school')['name'].count()/total_students*100),2)
per_passing_math



school
Bailey High School        89.53
Cabrera High School      100.00
Figueroa High School      88.44
Ford High School          89.30
Griffin High School      100.00
Hernandez High School     89.08
Holden High School       100.00
Huang High School         88.86
Johnson High School       89.18
Pena High School         100.00
Rodriguez High School     88.55
Shelton High School      100.00
Thomas High School       100.00
Wilson High School       100.00
Wright High School       100.00
Name: name, dtype: float64

In [17]:
# Calculate % Passing Reading
per_passing_reading = round((df_merged[df_merged['reading_score'] >= 60].groupby('school')['name'].count()/total_students*100), 2)
per_passing_reading


school
Bailey High School       100.0
Cabrera High School      100.0
Figueroa High School     100.0
Ford High School         100.0
Griffin High School      100.0
Hernandez High School    100.0
Holden High School       100.0
Huang High School        100.0
Johnson High School      100.0
Pena High School         100.0
Rodriguez High School    100.0
Shelton High School      100.0
Thomas High School       100.0
Wilson High School       100.0
Wright High School       100.0
Name: name, dtype: float64

In [18]:
# Calculate % Overall Passing Rate
per_overall_passing_rate = round((df_merged[(df_merged['reading_score'] >= 60) & (df_merged['math_score'] >= 60)].groupby('school')['name'].count()/total_students*100),2)
per_overall_passing_rate


school
Bailey High School        89.53
Cabrera High School      100.00
Figueroa High School      88.44
Ford High School          89.30
Griffin High School      100.00
Hernandez High School     89.08
Holden High School       100.00
Huang High School         88.86
Johnson High School       89.18
Pena High School         100.00
Rodriguez High School     88.55
Shelton High School      100.00
Thomas High School       100.00
Wilson High School       100.00
Wright High School       100.00
Name: name, dtype: float64

In [19]:
# Create School Summary

school_summary = pd.DataFrame({
    "School Type": school_type,
    "Total Students": total_students,
    "Total School Budget": total_school_budget,
    "Per Student Budget": per_student_budget,
    "Average Math Score": average_math_score,
    "Average Reading Score": average_reading_score,
    '% Passing Math': per_passing_math,
    '% Passing Reading': per_passing_reading,
    "% Overall Passing Rate": per_overall_passing_rate
})

school_summary = school_summary[['School Type', 
                          'Total Students', 
                          'Total School Budget', 
                          'Per Student Budget', 
                          'Average Math Score', 
                          'Average Reading Score',
                          '% Passing Math',
                          '% Passing Reading',
                          '% Overall Passing Rate']]

school_summary.style.format({
                          'Total Students': "{:,}", 
                          "Total School Budget": "${:,}", 
                           "Per Student Budget": "${:.0f}", 
                           "% Passing Math": "{:.1%}", 
                           "% Passing Reading": "{:.1%}", 
                           "Overall Passing Rate": "{:.1%}"})


Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Bailey High School,District,4976,3124928,628.0,77.05,81.03,89.53,100.0,89.53
Cabrera High School,Charter,1858,1081356,582.0,83.06,83.98,100.0,100.0,100.0
Figueroa High School,District,2949,1884411,639.0,76.71,81.16,88.44,100.0,88.44
Ford High School,District,2739,1763916,644.0,77.1,80.75,89.3,100.0,89.3
Griffin High School,Charter,1468,917500,625.0,83.35,83.82,100.0,100.0,100.0
Hernandez High School,District,4635,3022020,652.0,77.29,80.93,89.08,100.0,89.08
Holden High School,Charter,427,248087,581.0,83.8,83.81,100.0,100.0,100.0
Huang High School,District,2917,1910635,655.0,76.63,81.18,88.86,100.0,88.86
Johnson High School,District,4761,3094650,650.0,77.07,80.97,89.18,100.0,89.18
Pena High School,Charter,962,585858,609.0,83.84,84.04,100.0,100.0,100.0


In [21]:
#Print Header for School SUmmmary
print("SCHOOL SUMMARY\n")
print("---------------------------------------------------------------------\n")
school_summary

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Bailey High School,District,4976,3124928,628.0,77.05,81.03,89.53,100.0,89.53
Cabrera High School,Charter,1858,1081356,582.0,83.06,83.98,100.0,100.0,100.0
Figueroa High School,District,2949,1884411,639.0,76.71,81.16,88.44,100.0,88.44
Ford High School,District,2739,1763916,644.0,77.1,80.75,89.3,100.0,89.3
Griffin High School,Charter,1468,917500,625.0,83.35,83.82,100.0,100.0,100.0
Hernandez High School,District,4635,3022020,652.0,77.29,80.93,89.08,100.0,89.08
Holden High School,Charter,427,248087,581.0,83.8,83.81,100.0,100.0,100.0
Huang High School,District,2917,1910635,655.0,76.63,81.18,88.86,100.0,88.86
Johnson High School,District,4761,3094650,650.0,77.07,80.97,89.18,100.0,89.18
Pena High School,Charter,962,585858,609.0,83.84,84.04,100.0,100.0,100.0


In [22]:
# Calculate Top Performing Schools (By Passing Rate)
# Create a table that highlights the top 5 performing schools based on Overall Passing Rate. 
# sort values by passing rate and then only print top 5 
top_performing_5 = school_summary.sort_values("% Overall Passing Rate", ascending = False)

#Print Header for Top 5 performing schools
print("TOP 5 PERFORMING SCHOOLS BY PASSING RATE\n")
print("---------------------------------------------------------------------\n")
top_performing_5.head(5).style.format({'Total Students': '{:,}',
                           "Total School Budget": "${:,}", 
                           "Per Student Budget": "${:.0f}", 
                           "% Passing Math": "{:.1%}", 
                           "% Passing Reading": "{:.1%}", 
                           "Overall Passing Rate": "{:.1%}"})



Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Cabrera High School,Charter,1858,"$1,081,356",$582,83.06,83.98,10000.0%,10000.0%,100
Griffin High School,Charter,1468,"$917,500",$625,83.35,83.82,10000.0%,10000.0%,100
Holden High School,Charter,427,"$248,087",$581,83.8,83.81,10000.0%,10000.0%,100
Pena High School,Charter,962,"$585,858",$609,83.84,84.04,10000.0%,10000.0%,100
Shelton High School,Charter,1761,"$1,056,600",$600,83.36,83.73,10000.0%,10000.0%,100


In [23]:
# Calculate Top Performing Schools (By Passing Rate)
#Create a table that highlights the bottom 5 performing schools based on Overall Passing Rate.
# sort values by passing rate and then only print top 5 
bottom_performing_5 = top_performing_5.tail(5)

#Print Header for Bottom 5 Performing Schools
print("BOTTOM 5 PERFORMERS\n")
print("---------------------------------------------------------------------\n")
bottom_performing_5

Unnamed: 0,School Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Johnson High School,District,4761,3094650,650.0,77.07,80.97,89.18,100.0,89.18
Hernandez High School,District,4635,3022020,652.0,77.29,80.93,89.08,100.0,89.08
Huang High School,District,2917,1910635,655.0,76.63,81.18,88.86,100.0,88.86
Rodriguez High School,District,3999,2547363,637.0,76.84,80.74,88.55,100.0,88.55
Figueroa High School,District,2949,1884411,639.0,76.71,81.16,88.44,100.0,88.44


In [24]:
# Calculate Math Scores by Grade

#9th grade average math scores 
math9 = round((file_students_df.loc[file_students_df['grade'] == '9th'].groupby('school')["math_score"].mean()),2)
math9



school
Bailey High School       77.08
Cabrera High School      83.09
Figueroa High School     76.40
Ford High School         77.36
Griffin High School      82.04
Hernandez High School    77.44
Holden High School       83.79
Huang High School        77.03
Johnson High School      77.19
Pena High School         83.63
Rodriguez High School    76.86
Shelton High School      83.42
Thomas High School       83.59
Wilson High School       83.09
Wright High School       83.26
Name: math_score, dtype: float64

In [25]:
#10th grade average math scores 
math10 = round((file_students_df.loc[file_students_df['grade'] == '10th'].groupby('school')["math_score"].mean()),2)
math10




school
Bailey High School       77.00
Cabrera High School      83.15
Figueroa High School     76.54
Ford High School         77.67
Griffin High School      84.23
Hernandez High School    77.34
Holden High School       83.43
Huang High School        75.91
Johnson High School      76.69
Pena High School         83.37
Rodriguez High School    76.61
Shelton High School      82.92
Thomas High School       83.09
Wilson High School       83.72
Wright High School       84.01
Name: math_score, dtype: float64

In [26]:
#11th grade average math scores 
math11 = round((file_students_df.loc[file_students_df['grade'] == '11th'].groupby('school')["math_score"].mean()),2)
math11



school
Bailey High School       77.52
Cabrera High School      82.77
Figueroa High School     76.88
Ford High School         76.92
Griffin High School      83.84
Hernandez High School    77.14
Holden High School       85.00
Huang High School        76.45
Johnson High School      77.49
Pena High School         84.33
Rodriguez High School    76.40
Shelton High School      83.38
Thomas High School       83.50
Wilson High School       83.20
Wright High School       83.84
Name: math_score, dtype: float64

In [27]:
#12th grade average math scores 
math12= round((file_students_df.loc[file_students_df['grade'] == '12th'].groupby('school')["math_score"].mean()),2)
math12



school
Bailey High School       76.49
Cabrera High School      83.28
Figueroa High School     77.15
Ford High School         76.18
Griffin High School      83.36
Hernandez High School    77.19
Holden High School       82.86
Huang High School        77.23
Johnson High School      76.86
Pena High School         84.12
Rodriguez High School    77.69
Shelton High School      83.78
Thomas High School       83.50
Wilson High School       83.04
Wright High School       83.64
Name: math_score, dtype: float64

In [28]:
# Create a Dataframe for the Math Scores by Grade

math_scores_by_grade = pd.DataFrame({
        "9th": math9,
        "10th": math10,
        "11th": math11,
        "12th": math12
})
math_scores_by_grade = math_scores_by_grade[['9th', '10th', '11th', '12th']]
#Print Header for Math Scores by grade
print("MATH SCORES BY GRDE\n")
print("---------------------------------------------------------------------\n")
math_scores_by_grade

Unnamed: 0_level_0,9th,10th,11th,12th
school,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bailey High School,77.08,77.0,77.52,76.49
Cabrera High School,83.09,83.15,82.77,83.28
Figueroa High School,76.4,76.54,76.88,77.15
Ford High School,77.36,77.67,76.92,76.18
Griffin High School,82.04,84.23,83.84,83.36
Hernandez High School,77.44,77.34,77.14,77.19
Holden High School,83.79,83.43,85.0,82.86
Huang High School,77.03,75.91,76.45,77.23
Johnson High School,77.19,76.69,77.49,76.86
Pena High School,83.63,83.37,84.33,84.12


In [29]:
#9th grade average reading scores 
reading9 = round((file_students_df.loc[file_students_df['grade'] == '9th'].groupby('school')["reading_score"].mean()),2)
reading9

#10th grade average reading scores 
reading10 = round((file_students_df.loc[file_students_df['grade'] == '10th'].groupby('school')["reading_score"].mean()),2)
reading10

#11th grade average reading scores 
reading11 = round((file_students_df.loc[file_students_df['grade'] == '11th'].groupby('school')["reading_score"].mean()),2)
reading11

#12th grade average reading scores 
reading12 = round((file_students_df.loc[file_students_df['grade'] == '12th'].groupby('school')["reading_score"].mean()),2)
reading12

school
Bailey High School       80.91
Cabrera High School      84.29
Figueroa High School     81.38
Ford High School         80.66
Griffin High School      84.01
Hernandez High School    80.86
Holden High School       84.70
Huang High School        80.31
Johnson High School      81.23
Pena High School         84.59
Rodriguez High School    80.38
Shelton High School      82.78
Thomas High School       83.83
Wilson High School       84.32
Wright High School       84.07
Name: reading_score, dtype: float64

In [30]:
# Create a Dataframe for the Reading Scores by Grade

reading_scores_by_grade = pd.DataFrame({
        "9th": reading9,
        "10th": reading10,
        "11th": reading11,
        "12th": reading12
})
reading_scores_by_grade = reading_scores_by_grade[['9th', '10th', '11th', '12th']]
reading_scores_by_grade.index.name = " "

#Print Header for Reading Scores by grade
print("READING SCORES BY GRDE\n")
print("---------------------------------------------------------------------\n")

reading_scores_by_grade


Unnamed: 0,9th,10th,11th,12th
,,,,
Bailey High School,81.3,80.91,80.95,80.91
Cabrera High School,83.68,84.25,83.79,84.29
Figueroa High School,81.2,81.41,80.64,81.38
Ford High School,80.63,81.26,80.4,80.66
Griffin High School,83.37,83.71,84.29,84.01
Hernandez High School,80.87,80.66,81.4,80.86
Holden High School,83.68,83.32,83.82,84.7
Huang High School,81.29,81.51,81.42,80.31
Johnson High School,81.26,80.77,80.62,81.23


In [35]:
# Calculate Score by School Spending
# create spending bins
spending_bins = [0, 584.99, 614.99, 644.99, 675]
spending_group = ['< $585', "$585 - 615", "$615 - 645","$645 - 675"]
# Add each school spending in the bin
df_merged['spending_bins'] = pd.cut(df_merged['budget']/df_merged['size'], spending_bins, labels = spending_group)
df_merged['spending_bins']

#group by school spending
group_by_spending = df_merged.groupby('spending_bins')

# average math score by spending 
average_math_score = round((group_by_spending['math_score'].mean()),2)
average_math_score

# average math score by spending 
average_reading_score = round((group_by_spending['reading_score'].mean()),2)
average_reading_score

per_passing_math = round(((df_merged[df_merged['math_score'] >= 60].groupby('spending_bins')['name'].count()/group_by_spending['name'].count())*100),2)
per_passing_reading = (df_merged[df_merged['reading_score'] >= 60].groupby('spending_bins')['name'].count()/group_by_spending['name'].count())*100
per_overall_passing_rate = round(((df_merged[(df_merged['reading_score'] >= 60) & (df_merged['math_score'] >= 60)].groupby('spending_bins')['name'].count()/group_by_spending['name'].count())*100), 2)

per_passing_math 
per_passing_reading
per_overall_passing_rate


spending_bins
< $585        100.00
$585 - 615    100.00
$615 - 645     90.92
$645 - 675     89.07
Name: name, dtype: float64

In [36]:
            
# Build Dataframe for schools Score by School Spending            
scores_by_school_spend = pd.DataFrame({
    "Average Math Score": average_math_score,
    "Average Reading Score": average_reading_score,
    "% Passing Math":per_passing_math,
    "% Passing Reading": per_passing_reading,
    "% Overall Passing Rate": per_overall_passing_rate
            
})
            

scores_by_school_spend = scores_by_school_spend[[
    "Average Math Score",
    "Average Reading Score",
    "% Passing Math",
    "% Passing Reading",
    "% Overall Passing Rate"
]]

scores_by_school_spend.index.name = " Spending Ranges Per Student"
#Print Scores by School Spending
print("SCORES BY SCHOOL SPENDING\n")
print("---------------------------------------------------------------------\n")
scores_by_school_spend

SCORES BY SCHOOL SPENDING

---------------------------------------------------------------------



Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
Spending Ranges Per Student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
< $585,83.36,83.96,100.0,100.0,100.0
$585 - 615,83.53,83.84,100.0,100.0,100.0
$615 - 645,78.06,81.43,90.92,100.0,90.92
$645 - 675,77.05,81.01,89.07,100.0,89.07


In [37]:
# Calculate Score by School Size
# create bins
size_bins = [0, 999, 1999, 5000]
size_group = ["Small (<1000)", "Medium (1000-2000)" , "Large (2000-5000)"]
df_merged['size_bins'] = pd.cut(df_merged['size'], size_bins, labels = size_group)

#group by size
group_by_size = df_merged.groupby('size_bins')

# average math score by spending 
average_math_score = round((group_by_size['math_score'].mean()),2)
average_math_score

# average math score by spending 
average_reading_score = round((group_by_size['reading_score'].mean()),2)
average_reading_score

per_passing_math = round(((df_merged[df_merged['math_score'] >= 60].groupby('size_bins')['name'].count()/group_by_size['name'].count())*100),2)
per_passing_reading = (df_merged[df_merged['reading_score'] >= 60].groupby('size_bins')['name'].count()/group_by_size['name'].count())*100
per_overall_passing_rate = round(((df_merged[(df_merged['reading_score'] >= 60) & (df_merged['math_score'] >= 60)].groupby('size_bins')['name'].count()/group_by_size['name'].count())*100), 2)

per_passing_math 
per_passing_reading
per_overall_passing_rate




size_bins
Small (<1000)         100.00
Medium (1000-2000)    100.00
Large (2000-5000)      89.89
Name: name, dtype: float64

In [38]:
           
# Build Dataframe for schools Score by School Ssize            
scores_by_school_size = pd.DataFrame({
    "Average Math Score": average_math_score,
    "Average Reading Score": average_reading_score,
    "% Passing Math":per_passing_math,
    "% Passing Reading": per_passing_reading,
    "% Overall Passing Rate": per_overall_passing_rate
            
})
            

scores_by_school_size = scores_by_school_size[[
    "Average Math Score",
    "Average Reading Score",
    "% Passing Math",
    "% Passing Reading",
    "% Overall Passing Rate"
]]

scores_by_school_size.index.name = " School Size"
#Print Scores by School Spending
print("SCORES BY SCHOOL SIZE\n")
print("---------------------------------------------------------------------\n")
scores_by_school_size

SCORES BY SCHOOL SIZE

---------------------------------------------------------------------



Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Small (<1000),83.83,83.97,100.0,100.0,100.0
Medium (1000-2000),83.37,83.87,100.0,100.0,100.0
Large (2000-5000),77.48,81.2,89.89,100.0,89.89


In [40]:
# Calculate Score by School Type# Get the type of school, District or Charter

school_type=df_merged.groupby("type")
# average math score by spending 
average_math_score = round((school_type['math_score'].mean()),2)
average_math_score

# average math score by spending 
average_reading_score = round((school_type['reading_score'].mean()),2)
average_reading_score

per_passing_math = round(((df_merged[df_merged['math_score'] >= 60].groupby('type')['name'].count()/school_type['name'].count())*100),2)
per_passing_reading = (df_merged[df_merged['reading_score'] >= 60].groupby('type')['name'].count()/school_type['name'].count())*100
per_overall_passing_rate = round(((df_merged[(df_merged['reading_score'] >= 60) & (df_merged['math_score'] >= 60)].groupby('type')['name'].count()/school_type['name'].count())*100), 2)

per_passing_math 
per_passing_reading
per_overall_passing_rate

type
Charter     100.00
District     89.03
Name: name, dtype: float64

In [41]:
# Build Dataframe for schools Score by School Type            
scores_by_school_type = pd.DataFrame({
    "Average Math Score": average_math_score,
    "Average Reading Score": average_reading_score,
    "% Passing Math":per_passing_math,
    "% Passing Reading": per_passing_reading,
    "% Overall Passing Rate": per_overall_passing_rate
            
})
            

scores_by_school_type = scores_by_school_type[[
    "Average Math Score",
    "Average Reading Score",
    "% Passing Math",
    "% Passing Reading",
    "% Overall Passing Rate"
]]

scores_by_school_type.index.name = " School type"
#Print Scores by School Type
print("SCORES BY SCHOOL TYPE\n")
print("---------------------------------------------------------------------\n")
scores_by_school_type

SCORES BY SCHOOL TYPE

---------------------------------------------------------------------



Unnamed: 0_level_0,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Charter,83.41,83.9,100.0,100.0,100.0
District,76.99,80.96,89.03,100.0,89.03
