In [2]:
import os
from typing import Literal

import matplotlib.pyplot as plt
import pandas as pd

from school_graphs.comparison_graph import make_comparison_graph
from school_graphs.mocks.fake_school_comparison import School


ModuleNotFoundError: No module named 'school_graphs'

In [None]:
# Read in the RAG dataset
rag_df = pd.read_csv(
    "../data/synthetic/standard/synthetic_standard_survey.standard_school_aggregate_scores_rag.0000000010000.csv",
)
rag_df.head()


In [None]:
# Select relevant columns
# Create dictionary to store dfs for each school
# school_dfs: dict[str, pd.DataFrame] = {}

comparison_columns = [
    "variable",
    "mean",
    "rag",
    "school_lab",
    "year_group_lab",
    "gender_lab",
    "fsm_lab",
    "sen_lab",
    "group_wt_mean",
    "group_wt_std",
    "lower",
    "upper",
]
filtered_comparison_df = rag_df[comparison_columns]

SubGroups = Literal["all_pupils", "year_group", "gender", "fsm", "sen"]

# Split rag_df for each topic

# Create dictionary to store dfs for each topic
topic_comparisons_df: dict[str, pd.DataFrame] = {}

# Create a list of unique topics
topics = filtered_comparison_df["variable"].unique()

# Iterate over each topic and create a df for "All pupils"
for topic in topics:
    topic_df = filtered_comparison_df[filtered_comparison_df["variable"] == topic]
    topic_comparisons_df[topic] = topic_df

    # Create a df for "All pupils"
    all_pupils_df = topic_df[
        (topic_df["year_group_lab"] == "All")
        & (topic_df["gender_lab"] == "All")
        & (topic_df["fsm_lab"] == "All")
        & (topic_df["sen_lab"] == "All")
    ]

    # Store the df for "All pupils" in the dictionary
    topic_comparisons_df[f"{topic}_all_pupils"] = all_pupils_df

# To view the DataFrame for 'Autonomy' for "All pupils", all schools
autonomy_all_pupils = topic_comparisons_df.get("autonomy_score_all_pupils")
autonomy_all_pupils.head()
list_of_automony_mean_scores = autonomy_all_pupils["mean"]
list_of_automony_mean_scores.to_list()

list_of_automony_mean_scores = [
    School(mean_topic_score=score) for score in list_of_automony_mean_scores
]

figure = make_comparison_graph(
    schools=list_of_automony_mean_scores,
    range_low=topic_comparisons_df.get["lower"],
    range_high=topic_comparisons_df.get["upper"],
    y_label="Mean Autonomy Score",
    x_label="School",
    below_avg_amount=topic_comparisons_df.get["group_wt_mean"],  # School mean?
    average_amount=topic_comparisons_df.get["group_wt_mean"],  # School mean?
    current_school=School.topic_comparisons_df.get["mean_topic_score"],
)

plt.show()


In [None]:
# By year group

# Create dictionary to store dfs for each topic
topic_comparisons_df: dict[str, pd.DataFrame] = {}

# Iterate over each topic
for topic in topics:
    # Filter DataFrame for the current topic
    topic_df = filtered_comparison_df[filtered_comparison_df["variable"] == topic]

    # Filter by year groups (Year 8 and Year 10)
    year_group_comparison = topic_df[
        topic_df["year_group_lab"].isin(["Year 8", "Year 10"])
    ]

    # Create a DataFrame for each year group and store in the dictionary
    for year_group in year_group_comparison["year_group_lab"].unique():
        year_group_df = year_group_comparison[
            year_group_comparison["year_group_lab"] == year_group
        ]

        # Store the DataFrame in the dictionary, using topic and year group in the key
        topic_comparisons_df[f"{topic}_{year_group}"] = year_group_df

# To view the DataFrame for 'Autonomy' for "Year 8"
autonomy_year_8 = topic_comparisons_df.get("autonomy_score_Year 8")

# Display the first few rows of the DataFrame for 'Autonomy' in 'Year 8'
print(autonomy_year_8.head())




In [None]:
# By gender

# Iterate over each topic

for topic in topics:
    # Filter DataFrame for the current topic
    topic_df = filtered_comparison_df[filtered_comparison_df["variable"] == topic]

    # Filter by gender (Boy, Girl)
    gender_comparison = topic_df[
        topic_df["gender_lab"].isin(["Boy", "Girl"])
    ]

    # Create a DataFrame for each gender and store in the dictionary
    for gender in gender_comparison["gender_lab"].unique():
        gender_df = gender_comparison[
            gender_comparison["gender_lab"] == gender
        ]

        # Store the DataFrame in the dictionary, using topic and year group in the key
        topic_comparisons_df[f"{topic}_{gender}"] = gender_df

# To view the DataFrame for 'Optimism' for "Girl"
optimism_girl = topic_comparisons_df.get("optimism_score_Girl")



In [None]:
# By FSM

# Iterate over each topic

for topic in topics:
    # Filter DataFrame for the current topic
    topic_df = filtered_comparison_df[filtered_comparison_df["variable"] == topic]

    # Filter by FSM status (FSM, Non-FSM)
    fsm_comparison = topic_df[topic_df["fsm_lab"].isin(["FSM", "Non-FSM"])]

    # Create a DataFrame for each gender and store in the dictionary
    for fsm in fsm_comparison["fsm_lab"].unique():
        fsm_df = fsm_comparison[fsm_comparison["fsm_lab"] == fsm]

        # Store the DataFrame in the dictionary, using topic and year group in the key
        topic_comparisons_df[f"{topic}_{fsm}"] = fsm_df

# To view the DataFrame for 'Wellbeing' for "FSM"
wellbeing_fsm = topic_comparisons_df.get("wellbeing_score_FSM")



In [None]:
# By SEN status

# Iterate over each topic
for topic in topics:
    # Filter DataFrame for the current topic
    topic_df = filtered_comparison_df[filtered_comparison_df["variable"] == topic]

    # Filter by SEN status ("SEN", "Non-SEN")
    sen_comparison = topic_df[topic_df["sen_lab"].isin(["SEN", "Non-SEN"])]

    # Create a DataFrame for each SEN status and store it in the dictionary
    for sen_status in sen_comparison["sen_lab"].unique():
        sen_df = sen_comparison[sen_comparison["sen_lab"] == sen_status]

        # Store the DataFrame in the dictionary, using topic and SEN status in the key
        topic_comparisons_df[f"{topic}_{sen_status}"] = sen_df

# To view the DataFrame for 'Wellbeing' for "SEN"
wellbeing_sen = topic_comparisons_df.get("wellbeing_score_SEN")


# Select "your school"

In [122]:
# Save the dfs (for gender/fsm/sen/year group, save one of each category

In [23]:
# Graph code


# Save graphs in outputs / school / comparisons / [all_pupils, fsm, gender, sen, year_group] / [topic] / [topic]_[category].png

# Save each graph to the relevant folder
def save_graph(file_path):
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)
    plt.savefig(file_path, dpi=100)
    plt.close()

for topic in topics:
    for subgroup in ["all_pupils", "fsm", "gender", "sen", "year_group"]:
        filename = f"{topic}_{subgroup}.png"
        output_dir = f"../outputs/school/comparisons/{subgroup}/{topic}/{filename}"
        save_graph(output_dir)

# Save the text below in each of the folders
# "# Print text: Your school had [n] complete responses. Across Northern Devon, there were [n] complete responses from [n] schools. The average score for the pupils at your school, compared to other schools in Northern Devon, was: [RAG]"