In [2]:
import pandas as pd
import os

In [3]:
# Set paths for csv files
csv_path_sc = os.path.join('.', 'Resources', 'schools_complete.csv')
csv_path_st = os.path.join('.', 'Resources', 'students_complete.csv')

In [4]:
# Read in csv files
schools_df = pd.read_csv(csv_path_sc)

In [5]:
students_df = pd.read_csv(csv_path_st)

In [6]:
# Merge dataframes
school_data_full = pd.merge(schools_df, students_df, how= 'left', on=['school_name', 'school_name'])
school_data_full.head()

Unnamed: 0,School ID,school_name,type,size,budget,Student ID,student_name,gender,grade,reading_score,math_score
0,0,Huang High School,District,2917,1910635,0,Paul Bradley,M,9th,66,79
1,0,Huang High School,District,2917,1910635,1,Victor Smith,M,12th,94,61
2,0,Huang High School,District,2917,1910635,2,Kevin Rodriguez,M,12th,90,60
3,0,Huang High School,District,2917,1910635,3,Dr. Richard Scott,M,12th,67,58
4,0,Huang High School,District,2917,1910635,4,Bonnie Ray,F,9th,97,84


In [7]:
# District Summary Analysis
# Calculate total number of schools
dist_school_count = len(school_data_full['school_name'].unique())
#print(dist_school_count)

# Calculate total number of students (sum of student name value count)
dist_student_count = sum(school_data_full['student_name'].value_counts())
#print(dist_student_count)

# Calculate total budget (same as above)
dist_total_budget = sum(school_data_full['budget'].unique())
#print(dist_total_budget)

# Calculate avg math and reading (sum of math/student count etc.)
dist_avg_math = school_data_full['math_score'].sum() / dist_student_count
#print(dist_avg_math)

dist_avg_reading = school_data_full['reading_score'].sum() / dist_student_count
#print(dist_avg_reading)

# Calculate % passing math with score of 70 or higher
# Find all scores >=70 using loc, find length or that and divide by student count, muptiply by 100
passing_math = (len(school_data_full.loc[school_data_full['math_score'] >= 70]) / dist_student_count) * 100
passing_math = round(passing_math, 6)
#print(passing_math)

# Calulate % passing reading with score of 70 or higher (see above for logic)
passing_reading = len(school_data_full.loc[school_data_full['reading_score'] >= 70])/ dist_student_count * 100
passing_reading = round(passing_reading, 6)
#print(passing_reading)

# Calculate Overall passing rate - (passing_math + passing_reading)/2
overall_passing = (passing_math + passing_reading) / 2
overall_passing = round(overall_passing, 6)
#print(overall_passing)

In [8]:
# Create Summary Table for District Info
dist_summ_df = pd.DataFrame({'Total Schools': [dist_school_count], 'Total Students': dist_student_count, 'Total Budget': dist_total_budget,
                           'Average Math Score': dist_avg_math, 'Average Reading Score': dist_avg_reading, '% Passing Math': passing_math,
                            '% Passing Reading': passing_reading, 'Overall Passing Rate': overall_passing})
dist_summ_df['Total Budget'] = dist_summ_df['Total Budget'].map('${:,.2f}'.format)
dist_summ_df['% Passing Math'] = dist_summ_df['% Passing Math'].map('{:}%'.format)
dist_summ_df['% Passing Reading'] = dist_summ_df['% Passing Reading'].map('{:}%'.format)
dist_summ_df['Overall Passing Rate'] = dist_summ_df['Overall Passing Rate'].map('{:}%'.format)
dist_summ_df

Unnamed: 0,Total Schools,Total Students,Total Budget,Average Math Score,Average Reading Score,% Passing Math,% Passing Reading,Overall Passing Rate
0,15,39170,"$24,649,428.00",78.985371,81.87784,74.980853%,85.805463%,80.393158%


In [14]:
# School Summary Analysis
school_groups = school_data_full.groupby(['school_name'])

avg_schools = school_groups.mean()
avg_schools

Unnamed: 0_level_0,School ID,size,budget,Student ID,reading_score,math_score
school_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bailey High School,7.0,4976.0,3124928.0,20358.5,81.033963,77.048432
Cabrera High School,6.0,1858.0,1081356.0,16941.5,83.97578,83.061895
Figueroa High School,1.0,2949.0,1884411.0,4391.0,81.15802,76.711767
Ford High School,13.0,2739.0,1763916.0,36165.0,80.746258,77.102592
Griffin High School,4.0,1468.0,917500.0,12995.5,83.816757,83.351499
Hernandez High School,3.0,4635.0,3022020.0,9944.0,80.934412,77.289752
Holden High School,8.0,427.0,248087.0,23060.0,83.814988,83.803279
Huang High School,0.0,2917.0,1910635.0,1458.0,81.182722,76.629414
Johnson High School,12.0,4761.0,3094650.0,32415.0,80.966394,77.072464
Pena High School,9.0,962.0,585858.0,23754.5,84.044699,83.839917
