In [1]:

import os

import matplotlib.pyplot as plt
import pandas as pd
from rag_schools_comparison_graph import School, make_comparison_graph


def save_graph(file_path, figure) -> None:  # noqa: ANN001
    """Save the graph."""
    directory = os.path.dirname(file_path)  # noqa: PTH120
    if not os.path.exists(directory):  # noqa: PTH110
        os.makedirs(directory)  # noqa: PTH103
    figure.savefig(file_path, dpi=120, pad_inches=0.4, bbox_inches="tight")
    plt.close(figure)


In [2]:
rag_df = pd.read_csv(
    "../data/real/standard_area_aggregate_scores_rag.csv",
)


# Replace specific values in 'variable_lab' for clarity
rag_df["variable_lab"] = rag_df["variable_lab"].replace(
    {
        "Taking to staff about feelings": "Talking to staff about feelings",
        "Taking at home about feelings": "Talking at home about feelings",
        "Taking to peers about feelings": "Talking to peers about feelings",
        "Talking about feelings": "General talking about feelings",  # Keep temporarily for confirmation
    }
)

# Update 'group' column and relevant 'variable_lab' and 'description' values
rag_df.loc[rag_df["variable"] == "staff_talk_score", ["variable_lab", "description", "group"]] = [
    "Talking to staff about feelings",
    "How positively/negatively young people feel about talking with staff about feeling down",
    "staff_talk",
]

rag_df.loc[rag_df["variable"] == "peer_talk_score", ["variable_lab", "description", "group"]] = [
    "Talking to peers about feelings",
    "How positively/negatively young people feel about talking with peers about feeling down",
    "peer_talk",
]

rag_df.loc[rag_df["variable"] == "home_talk_score", ["variable_lab", "description", "group"]] = [
    "Talking at home about feelings",
    "How positively/negatively young people feel about talking at home about feeling down",
    "home_talk",
]

# Remove rows where 'variable' is 'talk_score' to exclude redundant entries
rag_df = rag_df[rag_df["variable"] != "talk_score"]





In [3]:
schools = rag_df["school_lab"].unique()
topics = rag_df["variable"].unique()

# Base subgroup filters
subgroup_filter_functions = {
    "Year 8": lambda df: df[df["year_group_lab"] == "Year 8"],
    "Year 10": lambda df: df[df["year_group_lab"] == "Year 10"],
    "SEN": lambda df: df[df["sen_lab"] == "SEN"],
    "Non-SEN": lambda df: df[df["sen_lab"] == "Non-SEN"],
    "FSM": lambda df: df[df["fsm_lab"] == "FSM"],
    "Non-FSM": lambda df: df[df["fsm_lab"] == "Non-FSM"],
    "Boy": lambda df: df[df["gender_lab"] == "Boy"],
    "Girl": lambda df: df[df["gender_lab"] == "Girl"],
    "All": lambda df: df[
        (df["year_group_lab"] == "All") &
        (df["fsm_lab"] == "All") &
        (df["sen_lab"] == "All") &
        (df["gender_lab"] == "All")
    ],  # Returns rows where all filters match "All"
}


In [4]:
# Loop over each school
for school in schools:
    # Filter DataFrame for the current school
    school_df = rag_df[rag_df["school_lab"] == school]

    # Loop over each subgroup filter
    for subgroup_name, subgroup_filter in subgroup_filter_functions.items():
        # Apply the subgroup filter to the school's DataFrame
        subgroup_df_for_school = subgroup_filter(school_df)

        # Loop over each topic
        for topic in topics:
            print(f"Processing {school}...")  # noqa: T201
            print(f"Processing {subgroup_name} for {school}...")  # noqa: T201
            print(f"Processing topic '{topic}' for {school} under {subgroup_name}...")  # noqa: T201
            print("\n")  # noqa: T201
            # Filter the DataFrame for the current topic
            filtered_df = subgroup_df_for_school[subgroup_df_for_school["variable"] == topic]

            # Check if there is data to process
            if filtered_df.empty:
                continue  # Skip if no data for this combination

            # Compute mean score for the current school, subgroup, and topic
            current_school_mean = filtered_df["mean"].values[0]

            # Get data for all schools (including the current school)
            all_schools_df: pd.DataFrame = rag_df[rag_df["variable"] == topic]
            all_schools_df = subgroup_filter(all_schools_df)

            # Check if there is data for all schools
            if all_schools_df.empty:
                continue  # Skip if no data

            # Compute mean scores for all schools and sort them
            all_schools_means = all_schools_df.set_index("school_lab")["mean"].sort_values()

            # Create School objects for all schools in sorted order
            all_schools = [
                School(school_name=school_name, mean_topic_score=mean)
                for school_name, mean in all_schools_means.items()
            ]

            # Identify the index of the current school in the sorted list, if it exists
            if school in all_schools_means.index:
                current_school_mean = all_schools_means.loc[school]
                current_school = School(
                    mean_topic_score=current_school_mean,
                    school_name=school,
                )
            else:
                # Handle missing school appropriately here
                current_school = None

            # Determine y-axis range
            range_low = all_schools_means.min() - (all_schools_means.min()/10)
            range_high = all_schools_means.max() + (all_schools_means.max()/10)

            # Labels for the graph
            y_label = "Mean score"
            x_label = "Northern Devon schools (ordered by mean score)"

            # Calculate overall statistics
            overall_mean = all_schools_df["group_wt_mean"].values[0]
            below_avg_amount = all_schools_df["lower"].values[0]
            above_avg_amount = all_schools_df["upper"].values[0]
            average_amount = overall_mean
            print("current_school: ", current_school)
            # Generate the graph and get the figure
            figure = make_comparison_graph(
                schools=all_schools,
                range_low=range_low,
                range_high=range_high,
                y_label=y_label,
                x_label=x_label,
                below_avg_amount=below_avg_amount,
                above_avg_amount=above_avg_amount,
                current_school=current_school,
            )



            # Save the figure using your save_graph function
            filename = f"outputs/{school}/rag_comparison/{subgroup_name}/{topic}.png"
            save_graph(filename, figure)


Processing BRAUNTON SCHOOL AND C.C....
Processing Year 8 for BRAUNTON SCHOOL AND C.C....
Processing topic 'autonomy_score' for BRAUNTON SCHOOL AND C.C. under Year 8...


current_school:  School(mean_topic_score=np.float64(19.859060402684563), school_name='BRAUNTON SCHOOL AND C.C.')
Processing BRAUNTON SCHOOL AND C.C....
Processing Year 8 for BRAUNTON SCHOOL AND C.C....
Processing topic 'life_satisfaction_score' for BRAUNTON SCHOOL AND C.C. under Year 8...


current_school:  School(mean_topic_score=np.float64(6.9072847682119205), school_name='BRAUNTON SCHOOL AND C.C.')
Processing BRAUNTON SCHOOL AND C.C....
Processing Year 8 for BRAUNTON SCHOOL AND C.C....
Processing topic 'optimism_score' for BRAUNTON SCHOOL AND C.C. under Year 8...


current_school:  School(mean_topic_score=np.float64(11.61111111111111), school_name='BRAUNTON SCHOOL AND C.C.')
Processing BRAUNTON SCHOOL AND C.C....
Processing Year 8 for BRAUNTON SCHOOL AND C.C....
Processing topic 'wellbeing_score' for BRAUNTON SCHOOL