In [1]:
# Title: Analysis of School Data

# Summary: This Python code loads a dataset of school information and performs several data analysis tasks. It begins by identifying schools with average math scores exceeding 80% of the maximum possible score. It then calculates the total SAT scores for each school and identifies the top 10 schools with the highest total SAT scores. Finally, the code groups schools by borough and determines the borough with the largest standard deviation in total SAT scores, providing a valuable overview of school performance and regional disparities.

# Import Pandas
import pandas as pd

# Read in the data
schools = pd.read_csv("D:/Projects Python Portfolio/Exploring NYC Public School Test Result Scores/schools.csv")

# Preview the data
schools.head()

# Get the list of column names in the DataFrame
column_names = schools.columns.tolist()
print(column_names)


['school_name', 'borough', 'building_code', 'average_math', 'average_reading', 'average_writing', 'percent_tested']


In [2]:
# Set a maximum score and percentage threshold
max_score = 800
percent_threshold = 0.8

# Calculate the score threshold at 80% of the maximum score
score_80_threshold = percent_threshold * max_score
print(score_80_threshold)

# Find schools with an average math score greater than or equal to the threshold
best_math_schools = schools[['school_name','average_math']][schools['average_math'] >= score_80_threshold].sort_values(by='average_math', ascending=False)



640.0


In [3]:
# Calculate the total SAT score for each school
schools['total_SAT'] = schools['average_math'] + schools['average_reading'] + schools['average_writing']

# Get the updated list of column names
column_names = schools.columns.tolist()
print(column_names)

# Find the top 10 schools with the highest total SAT scores
top_10_schools = schools[['school_name','total_SAT']].sort_values(by='total_SAT',
                                                                  ascending=False).head(10)
print(top_10_schools)



['school_name', 'borough', 'building_code', 'average_math', 'average_reading', 'average_writing', 'percent_tested', 'total_SAT']
                                           school_name  total_SAT
88                              Stuyvesant High School       2144
170                       Bronx High School of Science       2041
93                 Staten Island Technical High School       2041
174  High School of American Studies at Lehman College       2013
333                        Townsend Harris High School       1981
365  Queens High School for the Sciences at York Co...       1947
5                       Bard High School Early College       1914
280                     Brooklyn Technical High School       1896
45                       Eleanor Roosevelt High School       1889
68   High School for Mathematics, Science, and Engi...       1889


In [4]:
# Group schools by borough and calculate count, mean, and standard deviation of total SAT scores
borough_groups = schools.groupby('borough')['total_SAT'].agg(['count',
                                                              'mean',
                                                              'std']).round(2)

# Find the borough with the largest standard deviation
largest_std_dev = borough_groups[borough_groups['std'] == borough_groups['std'].max()]

# Rename the columns for clarity
column_names = {'count':'num_schools', 
                'mean':'average_SAT',
                'std':'std_SAT'}
largest_std_dev = largest_std_dev.rename(columns=column_names)
print(largest_std_dev)


           num_schools  average_SAT  std_SAT
borough                                     
Manhattan           89      1340.13   230.29
