In [1]:

import os

import matplotlib.pyplot as plt
import pandas as pd
from rag_schools_comparison_graph import School, make_comparison_graph


def save_graph(file_path, figure):
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)
    figure.savefig(file_path, dpi=100)
    plt.close(figure)


In [2]:
rag_df = pd.read_csv(
    "../data/real/standard_area_aggregate_scores_rag.csv",
)

rag_df


Unnamed: 0,variable,mean,count,school_lab,year_group_lab,fsm_lab,sen_lab,gender_lab,total_pupils,group_n,group_wt_mean,group_wt_std,lower,upper,rag,variable_lab,description
0,autonomy_score,20.021661,277.0,BRAUNTON SCHOOL AND C.C.,All,All,All,All,1189.0,5.0,19.206056,0.757852,18.448203,19.963908,above,Autonomy,How 'in control' young people feel of their life
1,life_satisfaction_score,6.906810,279.0,BRAUNTON SCHOOL AND C.C.,All,All,All,All,1216.0,5.0,6.401316,0.396911,6.004405,6.798226,above,Life satisfaction,How satisfied young people feel with their life
2,optimism_score,11.409594,271.0,BRAUNTON SCHOOL AND C.C.,All,All,All,All,1179.0,5.0,11.087362,0.393545,10.693817,11.480907,average,Optimism,Young people's hopefulness and confidence for ...
3,wellbeing_score,22.876812,276.0,BRAUNTON SCHOOL AND C.C.,All,All,All,All,1197.0,5.0,21.841270,0.784775,21.056495,22.626044,above,Psychological wellbeing,How positive and generally happy young people ...
4,esteem_score,14.666667,273.0,BRAUNTON SCHOOL AND C.C.,All,All,All,All,1188.0,5.0,13.978956,0.462649,13.516307,14.441605,above,Self-esteem,How much young people value themselves
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1435,wealth_score,0.540373,161.0,THE ILFRACOMBE ACADEMY,All,All,Non-SEN,All,819.0,5.0,0.573871,0.033225,0.540646,0.607096,below,Relative wealth,Whether young people feel their family is rich...
1436,future_score,7.579861,144.0,THE ILFRACOMBE ACADEMY,All,All,Non-SEN,All,721.0,5.0,8.233010,0.410375,7.822635,8.643385,below,Future opportunities,How young people feel regarding the future opt...
1437,climate_score,2.661616,198.0,THE ILFRACOMBE ACADEMY,All,All,Non-SEN,All,1011.0,5.0,2.639960,0.138283,2.501677,2.778244,average,Climate change,Worries regarding climate change
1438,social_score,14.101010,198.0,THE ILFRACOMBE ACADEMY,All,All,Non-SEN,All,999.0,5.0,14.862863,0.578002,14.284861,15.440865,below,Support from friends,The support young people receive from their peers


In [3]:
schools = rag_df["school_lab"].unique()
topics = rag_df["variable"].unique()

# Base subgroup filters
subgroup_filter_functions = {
    "Year 8": lambda df: df[df["year_group_lab"] == "Year 8"],
    "Year 10": lambda df: df[df["year_group_lab"] == "Year 10"],
    "SEN": lambda df: df[df["sen_lab"] == "SEN"],
    "Non-SEN": lambda df: df[df["sen_lab"] == "Non-SEN"],
    "FSM": lambda df: df[df["fsm_lab"] == "FSM"],
    "Non-FSM": lambda df: df[df["fsm_lab"] == "Non-FSM"],
    "Boy": lambda df: df[df["gender_lab"] == "Boy"],
    "Girl": lambda df: df[df["gender_lab"] == "Girl"],
    "All": lambda df: df[
        (df["year_group_lab"] == "All") &
        (df["fsm_lab"] == "All") &
        (df["sen_lab"] == "All") &
        (df["gender_lab"] == "All")
    ],  # Returns rows where all filters match "All"
}


In [4]:
# Loop over each school
for school in schools:
    # Filter DataFrame for the current school
    school_df = rag_df[rag_df["school_lab"] == school]

    # Loop over each subgroup filter
    for subgroup_name, subgroup_filter in subgroup_filter_functions.items():
        # Apply the subgroup filter to the school's DataFrame
        subgroup_df_for_school = subgroup_filter(school_df)

        # Loop over each topic
        for topic in topics:
            print(f"Processing {school}...")  # noqa: T201
            print(f"Processing {subgroup_name} for {school}...")  # noqa: T201
            print(f"Processing topic '{topic}' for {school} under {subgroup_name}...")  # noqa: T201
            print("\n")  # noqa: T201
            # Filter the DataFrame for the current topic
            filtered_df = subgroup_df_for_school[subgroup_df_for_school["variable"] == topic]

            # Check if there is data to process
            if filtered_df.empty:
                continue  # Skip if no data for this combination

            # Compute mean score for the current school, subgroup, and topic
            current_school_mean = filtered_df["mean"].mean()
            current_school_count = filtered_df["count"].sum()

            # Get data for all schools (including the current school)
            all_schools_df = rag_df[rag_df["variable"] == topic]
            all_schools_df = subgroup_filter(all_schools_df)

            # Check if there is data for all schools
            if all_schools_df.empty:
                continue  # Skip if no data

            # Compute mean scores for all schools and sort them
            all_schools_grouped = all_schools_df.groupby("school_lab")
            all_schools_means = all_schools_grouped["mean"].mean()
            all_schools_means = all_schools_means.sort_values()  # Sort from smallest to largest

            # Create School objects for all schools in sorted order
            all_schools = [School(mean_topic_score=mean) for mean in all_schools_means]

            # Identify the index of the current school in the sorted list
            if school in all_schools_means.index:
                current_school_index = all_schools_means.index.tolist().index(school)
                current_school = all_schools[current_school_index]
            else:
                continue  # Skip if current school is not in the data

            # Determine y-axis range
            range_low = all_schools_means.min() - 1  # Adjust as needed
            range_high = all_schools_means.max() + 1

            # Labels for the graph
            y_label = "Mean Score"
            x_label = "Northern Devon schools (ordered by mean score)"

            # Calculate overall statistics
            overall_mean = all_schools_df["mean"].mean()
            below_avg_amount = all_schools_df[all_schools_df["mean"] < overall_mean]["mean"].mean()
            above_avg_amount = all_schools_df[all_schools_df["mean"] > overall_mean]["mean"].mean()
            average_amount = overall_mean

            # Generate the graph and get the figure
            figure = make_comparison_graph(
                schools=all_schools,
                range_low=range_low,
                range_high=range_high,
                y_label=y_label,
                x_label=x_label,
                below_avg_amount=below_avg_amount,
                average_amount=average_amount,
                current_school=current_school,
            )

            # Save the figure using your save_graph function
            filename = (
                f"outputs/rag-comparison/{school}/{subgroup_name}/{topic}.png"
            )
            save_graph(filename, figure)


Processing BRAUNTON SCHOOL AND C.C....
Processing Year 8 for BRAUNTON SCHOOL AND C.C....
Processing topic 'autonomy_score' for BRAUNTON SCHOOL AND C.C. under Year 8...


Processing BRAUNTON SCHOOL AND C.C....
Processing Year 8 for BRAUNTON SCHOOL AND C.C....
Processing topic 'life_satisfaction_score' for BRAUNTON SCHOOL AND C.C. under Year 8...


Processing BRAUNTON SCHOOL AND C.C....
Processing Year 8 for BRAUNTON SCHOOL AND C.C....
Processing topic 'optimism_score' for BRAUNTON SCHOOL AND C.C. under Year 8...


Processing BRAUNTON SCHOOL AND C.C....
Processing Year 8 for BRAUNTON SCHOOL AND C.C....
Processing topic 'wellbeing_score' for BRAUNTON SCHOOL AND C.C. under Year 8...


Processing BRAUNTON SCHOOL AND C.C....
Processing Year 8 for BRAUNTON SCHOOL AND C.C....
Processing topic 'esteem_score' for BRAUNTON SCHOOL AND C.C. under Year 8...


Processing BRAUNTON SCHOOL AND C.C....
Processing Year 8 for BRAUNTON SCHOOL AND C.C....
Processing topic 'stress_score' for BRAUNTON SCHOOL A