In [27]:
import pandas as pd
import os

In [28]:
# Set paths for csv files
csv_path_sc = os.path.join('.', 'Resources', 'schools_complete.csv')
csv_path_st = os.path.join('.', 'Resources', 'students_complete.csv')

In [29]:
# Read in csv files and change column names
schools_df = pd.read_csv(csv_path_sc)
schools_df = schools_df.rename(columns= {'school_name': 'School Name', 'type': 'Type', 'size': 'Total Students', 
                                         'budget': 'Total School Budget'})

students_df = pd.read_csv(csv_path_st)
students_df = students_df.rename(columns= {'student_name': 'Student Name', 'gender': 'Gender', 'grade': 'Grade', 
                                           'school_name': 'School Name', 'reading_score': 'Reading Score', 'math_score': 'Math Score'})

In [30]:
# Merge school and student dataframes
school_data_full = pd.merge(schools_df, students_df, how= 'left', on=['School Name', 'School Name'])

In [31]:
# District Summary Analysis
# Calculate total number of schools
dist_school_count = len(school_data_full['School Name'].unique())
#print(dist_school_count)

# Calculate total number of students (sum of student name value count)
dist_student_count = sum(school_data_full['Student Name'].value_counts())
#print(dist_student_count)

# Calculate total budget (same as above)
dist_total_budget = sum(school_data_full['Total School Budget'].unique())
#print(dist_total_budget)

# Calculate avg math and reading (sum of math/student count etc.)
dist_avg_math = school_data_full['Math Score'].sum() / dist_student_count
#print(dist_avg_math)

dist_avg_reading = school_data_full['Reading Score'].sum() / dist_student_count
#print(dist_avg_reading)

# Calculate % passing math with score of 70 or higher
# Find all scores >=70 using loc, find length or that and divide by student count, muptiply by 100
passing_math = (len(school_data_full.loc[school_data_full['Math Score'] >= 70]) / dist_student_count) * 100
passing_math = round(passing_math, 6)
#print(passing_math)

# Calulate % passing reading with score of 70 or higher (see above for logic)
passing_reading = len(school_data_full.loc[school_data_full['Reading Score'] >= 70])/ dist_student_count * 100
passing_reading = round(passing_reading, 6)
#print(passing_reading)

# Calculate Overall passing rate - (passing_math + passing_reading)/2
overall_passing = (passing_math + passing_reading) / 2
overall_passing = round(overall_passing, 6)
#print(overall_passing)

In [32]:
# Create Summary Table for District Info
dist_summ_df = pd.DataFrame({'Total Schools': [dist_school_count], 'Total Students': dist_student_count, 'Total Budget': dist_total_budget,
                           'Average Math Score': dist_avg_math, 'Average Reading Score': dist_avg_reading, '% Passing Math': passing_math,
                            '% Passing Reading': passing_reading, 'Overall Passing Rate': overall_passing})
dist_summ_df['Total Budget'] = dist_summ_df['Total Budget'].map('${:,.2f}'.format)
dist_summ_df['% Passing Math'] = dist_summ_df['% Passing Math'].map('{:}%'.format)
dist_summ_df['% Passing Reading'] = dist_summ_df['% Passing Reading'].map('{:}%'.format)
dist_summ_df['Overall Passing Rate'] = dist_summ_df['Overall Passing Rate'].map('{:}%'.format)
dist_summ_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853%,85.805463%,80.393158%


In [33]:
# School Summary Analysis
# Group main dataframe by school name
school_groups = school_data_full.groupby(['School Name'])

In [34]:
# Make a new dataframe getting averages of size, budget, and scores
avg_schools = school_groups.mean()
avg_schools = avg_schools[['Total Students', 'Total School Budget', 'Reading Score', 'Math Score']]

In [35]:
# Add column for Per Student Budget (total budget/ total students)
avg_schools['Per Student Budget'] = avg_schools['Total School Budget'] / avg_schools['Total Students']

In [36]:
# Add column of type ************* find a better way for this
avg_schools['Type'] = school_groups['Type'].unique()

In [37]:
# Create new dataframe with sums of columns from groupby
sum_schools = school_groups.sum()
#sum_schools

In [38]:
# Calculate average math score
avg_schools['Average Math Score'] = sum_schools['Math Score'] / avg_schools['Total Students']
avg_schools['Average Reading Score'] = sum_schools['Reading Score'] / avg_schools['Total Students']

In [39]:
# Use merged dataframe to find students passing math and reading (scores over 70)
passing_math_df = school_data_full[school_data_full['Math Score']>= 70]
passing_read_df = school_data_full[school_data_full['Reading Score']>= 70]
# Group new dataframes by school name
group_pass_math = passing_math_df.groupby('School Name')
group_pass_read = passing_read_df.groupby('School Name')


In [40]:
# Add columns to avg_schools dataframe for % passing math and reading (count of math scores in group_pass_math/ total students)
avg_schools['% Passing Math'] = group_pass_math['Math Score'].count()/ avg_schools['Total Students'] * 100
avg_schools['% Passing Reading'] = group_pass_read['Reading Score'].count()/ avg_schools['Total Students'] * 100

In [41]:
# Add column for overall passing (%PassMath + % PassRead)/ 2
avg_schools['% Overall Passing Rate'] = (avg_schools['% Passing Math'] + avg_schools['% Passing Reading']) / 2


In [42]:
# Reorganize column names
org_avg_schools = avg_schools[['Type', 'Total Students', 'Total School Budget', 'Per Student Budget', 'Average Math Score', 
              'Average Reading Score', '% Passing Math', '% Passing Reading', '% Overall Passing Rate']]


In [43]:
# Sort values on % Overall to find top 5 and bottom 5
overall_sort_top = org_avg_schools.sort_values('% Overall Passing Rate', ascending = False)
overall_sort_top.head(5)

Unnamed: 0_level_0,Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Cabrera High School,[Charter],1858.0,1081356.0,582.0,83.061895,83.97578,94.133477,97.039828,95.586652
Thomas High School,[Charter],1635.0,1043130.0,638.0,83.418349,83.84893,93.272171,97.308869,95.29052
Pena High School,[Charter],962.0,585858.0,609.0,83.839917,84.044699,94.594595,95.945946,95.27027
Griffin High School,[Charter],1468.0,917500.0,625.0,83.351499,83.816757,93.392371,97.138965,95.265668
Wilson High School,[Charter],2283.0,1319574.0,578.0,83.274201,83.989488,93.867718,96.539641,95.203679


In [44]:
overall_sort_bottom = org_avg_schools.sort_values('% Overall Passing Rate')
overall_sort_bottom.head()

Unnamed: 0_level_0,Type,Total Students,Total School Budget,Per Student Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,% Overall Passing Rate
School Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Rodriguez High School,[District],3999.0,2547363.0,637.0,76.842711,80.744686,66.366592,80.220055,73.293323
Figueroa High School,[District],2949.0,1884411.0,639.0,76.711767,81.15802,65.988471,80.739234,73.363852
Huang High School,[District],2917.0,1910635.0,655.0,76.629414,81.182722,65.683922,81.316421,73.500171
Johnson High School,[District],4761.0,3094650.0,650.0,77.072464,80.966394,66.057551,81.222432,73.639992
Ford High School,[District],2739.0,1763916.0,644.0,77.102592,80.746258,68.309602,79.299014,73.804308
