In [5]:
import pandas as pd
from pathlib import Path

# Read the CSV file
df = pd.read_csv('schools_complete.csv')
df = pd.read_csv('students_complete.csv')

# Calculate the required metrics for the district summary
total_schools = df['school_name'].nunique()
total_students = df['size'].sum()
total_budget = df['budget'].sum()
average_math_score = df['math_score'].mean()
average_reading_score = df['reading_score'].mean()
passing_math_percentage = (df['math_score'] >= 70).mean() * 100
passing_reading_percentage = (df['reading_score'] >= 70).mean() * 100
overall_passing_percentage = ((df['math_score'] >= 70) & (df['reading_score'] >= 70)).mean() * 100

# Create the district summary DataFrame
district_summary = pd.DataFrame({
    'Total Schools': [total_schools],
    'Total Students': [total_students],
    'Total Budget': [total_budget],
    'Average Math Score': [average_math_score],
    'Average Reading Score': [average_reading_score],
    '% Passing Math': [passing_math_percentage],
    '% Passing Reading': [passing_reading_percentage],
    '% Overall Passing': [overall_passing_percentage]
})

# Group the data by school
school_groups = df.groupby(['school_name', 'type'])

# Calculate the required metrics for the school summary
total_students_per_school = school_groups['size'].first()
total_budget_per_school = school_groups['budget'].first()
per_student_budget = total_budget_per_school / total_students_per_school
average_math_score_per_school = school_groups['math_score'].mean()
average_reading_score_per_school = school_groups['reading_score'].mean()
passing_math_percentage_per_school = (df[df['math_score'] >= 70].groupby(['school_name', 'type']).size() / total_students_per_school) * 100
passing_reading_percentage_per_school = (df[df['reading_score'] >= 70].groupby(['school_name', 'type']).size() / total_students_per_school) * 100
overall_passing_percentage_per_school = (df[(df['math_score'] >= 70) & (df['reading_score'] >= 70)].groupby(['school_name', 'type']).size() / total_students_per_school) * 100

# Create the school summary DataFrame
school_summary = pd.DataFrame({
    'School Name': total_students_per_school.index.get_level_values('school_name'),
    'School Type': total_students_per_school.index.get_level_values('type'),
    'Total Students': total_students_per_school.values,
    'Total School Budget': total_budget_per_school.values,
    'Per Student Budget': per_student_budget.values,
    'Average Math Score': average_math_score_per_school.values,
    'Average Reading Score': average_reading_score_per_school.values,
    '% Passing Math': passing_math_percentage_per_school.values,
    '% Passing Reading': passing_reading_percentage_per_school.values,
    '% Overall Passing': overall_passing_percentage_per_school.values
})

# Highest-Performing Schools (by % Overall Passing)
top_schools = school_summary.sort_values('% Overall Passing', ascending=False).head(5)

# Lowest-Performing Schools (by % Overall Passing)
bottom_schools = school_summary.sort_values('% Overall Passing').head(5)

# Math Scores by Grade
math_scores_by_grade = df.pivot_table(index='school_name', columns='grade', values='math_score', aggfunc='mean')
math_scores_by_grade = math_scores_by_grade[['9th', '10th', '11th', '12th']]

# Reading Scores by Grade
reading_scores_by_grade = df.pivot_table(index='school_name', columns='grade', values='reading_score', aggfunc='mean')
reading_scores_by_grade = reading_scores_by_grade[['9th', '10th', '11th', '12th']]

# Scores by School Spending
spending_bins = [0, 585, 630, 645, 680]
labels = ["<$585", "$585-630", "$630-645", "$645-680"]
school_summary['Spending Ranges (Per Student)'] = pd.cut(school_summary['Per Student Budget'], bins=spending_bins, labels=labels)
school_spending_df = school_summary.groupby('Spending Ranges (Per Student)').mean()

spending_math_scores = school_spending_df['Average Math Score']
spending_reading_scores = school_spending_df['Average Reading Score']
spending_passing_math = school_spending_df['% Passing Math']
spending_passing_reading = school_spending_df['% Passing Reading']
overall_passing_spending = school_spending_df['% Overall Passing']

# Create the spending_summary DataFrame
spending_summary = pd.DataFrame({
    'Average Math Score': spending_math_scores,
    'Average Reading Score': spending_reading_scores,
    '% Passing Math': spending_passing_math,
    '% Passing Reading': spending_passing_reading,
    '% Overall Passing': overall_passing_spending
})

# Display the DataFrames
district_summary, school_summary, top_schools, bottom_schools, math_scores_by_grade, reading_scores_by_grade, spending_summary


ModuleNotFoundError: No module named 'pandas'