In [54]:
# importing Pandas library
import pandas as pd
import matplotlib.pyplot as plt

# Reading the csv file
schools = pd.read_csv("schools.csv")

# Preview the data
schools.head()


Unnamed: 0,school_name,borough,building_code,average_math,average_reading,average_writing,percent_tested
0,"New Explorations into Science, Technology and ...",Manhattan,M022,657,601,601,
1,Essex Street Academy,Manhattan,M445,395,411,387,78.9
2,Lower Manhattan Arts Academy,Manhattan,M445,418,428,415,65.1
3,High School for Dual Language and Asian Studies,Manhattan,M445,613,453,463,95.9
4,Henry Street School for International Studies,Manhattan,M056,410,406,381,59.7


In [55]:
# 640 is the minimum score to get a 80% score
best_math_school = schools[schools["average_math"] >= 640][['school_name',"average_math"]].sort_values(by= "average_math", ascending = False )

In [56]:
# Calculating SAT per school and creating a new columns to store the value 
schools['total_SAT'] = (schools['average_math'] + schools['average_reading'] + schools['average_writing'])

# Top 10 performing schools
top_10_schools = schools.groupby('school_name', as_index=False)['total_SAT'].mean().sort_values('total_SAT', ascending = False).head(10)


top_10_schools

Unnamed: 0,school_name,total_SAT
325,Stuyvesant High School,2144.0
324,Staten Island Technical High School,2041.0
55,Bronx High School of Science,2041.0
188,High School of American Studies at Lehman College,2013.0
334,Townsend Harris High School,1981.0
293,Queens High School for the Sciences at York Co...,1947.0
30,Bard High School Early College,1914.0
83,Brooklyn Technical High School,1896.0
121,Eleanor Roosevelt High School,1889.0
180,"High School for Mathematics, Science, and Engi...",1889.0


In [57]:
# NYC borough with the highest standard deviation for total_SAT
boroughs = schools.groupby('borough')['total_SAT'].agg(['count','mean', 'std']).round(2)

# Filter for the maximum value in  std and resetting the index so borough is a column
largest_std_dev = boroughs[boroughs['std'] == boroughs['std'].max()]

#Renaming the columns with more clear and appropriate names
largest_std_dev = largest_std_dev.rename(columns = {'count':'num_schools', 'mean': 'average_SAT', 'std': 'std_SAT'})

largest_std_dev
                                      

Unnamed: 0_level_0,num_schools,average_SAT,std_SAT
borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Manhattan,89,1340.13,230.29
