In [26]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

In [27]:
current_dir = os.getcwd()
project_dir = os.path.dirname(current_dir)

# Loading rule specification
data_dir = os.path.join(project_dir, 'data', 'raw_data_2.xlsx')

# Loading complex matrices excel rule list
single_dir = os.path.join(project_dir, 'rule_output', 'in_out_single.xlsx')
four_dir = os.path.join(project_dir, 'rule_output', 'in_out_four.xlsx')
left_right_dir = os.path.join(project_dir, 'rule_output', 'left_right_single.xlsx')
up_down_dir = os.path.join(project_dir, 'rule_output', 'up_down.xlsx')

In [28]:
data_spec = pd.read_excel(data_dir, sheet_name='Scoring Key')

single_df = pd.read_excel(single_dir)
four_df = pd.read_excel(four_dir)
left_right_df = pd.read_excel(left_right_dir)
up_down_df = pd.read_excel(up_down_dir)

In [29]:
# Step 2: Standardize terminology in data_spec to match the matrix files
def standardize_term(x):
    if isinstance(x, str):
        x = x.strip()
        replacements = {
            "Distributed Three": "Distribute_Three",
            "Arithmetic": "Arithmetic",
            "Distribute Three": "Distribute_Three"
        }
        return replacements.get(x, x)
    return x

for col in ['Type_0', 'Size_0', 'Type_1', 'Size_1']:
    data_spec[col] = data_spec[col].apply(standardize_term)

In [30]:
# Step 2: Create a mapping from design to dataframe
design_to_df = {
    'In_Out_Center_Single': single_df,
    'In_Out_Distribute_Four': four_df,
    'Left_Right_Single': left_right_df,
    'Up_Down_Single': up_down_df
}

In [31]:
# Store eligible results
eligible_sources = []

for idx, row in data_spec.iterrows():
    design = row['Design']
    df = design_to_df.get(design)

    if df is not None:
        filtered = df[
            (df['Number_0'] == 'Constant') &
            (df['Position_0'] == 'Constant') &
            (df['Number_1'] == 'Constant') &
            (df['Position_1'] == 'Constant') &
            (df['Color_0'] == 'Constant') &
            (df['Color_1'] == 'Constant') &
            (df['Type_0'] == row['Type_0']) &
            (df['Size_0'] == row['Size_0']) &
            (df['Type_1'] == row['Type_1']) &
            (df['Size_1'] == row['Size_1'])
        ]
        names = filtered['Name'].tolist()
        eligible_sources.append(names)
    else:
        eligible_sources.append([])

# Update and display result
result_df = data_spec.copy()
result_df['Source_file'] = eligible_sources

In [32]:
result_df.head(1)

Unnamed: 0,ID,Design,Numer/Position_0,Type_0,Size_0,Color_0,Number/Position_1,Type_1,Size_1,Color_1,Source_file,File_Name,Correct Option,Reece's Comments,New_Item,New Correct Choice
0,17,In_Out_Distribute_Four,Constant,Distribute_Three,Distribute_Three,Constant,Constant,Distribute_Three,Arithmetic,Constant,"[RAVEN_11284_train, RAVEN_11558_test, RAVEN_12...",IOF_Dt_Dt_Dt_A,choice_6,Easy,,


In [33]:
# === Plotting functions ===
def save_problem_grid(image, filename):
    fig = plt.figure(figsize=(10, 10))
    for i in range(8):
        ax = plt.subplot(3, 3, i + 1)
        ax.imshow(image[i], cmap='gray')
        ax.axis('off')
        rect = Rectangle((0, 0), 1, 1, transform=ax.transAxes, color='gray', fill=False, lw=1.8)
        ax.add_patch(rect)
    ax = plt.subplot(3, 3, 9)
    ax.text(0.5, 0.5, '?', fontsize=40, ha='center', va='center')
    ax.axis('off')
    rect = Rectangle((0, 0), 1, 1, transform=ax.transAxes, color='gray', fill=False, lw=1.8)
    ax.add_patch(rect)
    plt.subplots_adjust(wspace=0.1, hspace=0.1)
    plt.savefig(filename)
    plt.close(fig)

def save_choices_plot(image, target, filename):
    fig = plt.figure(figsize=(12, 6))
    for i in range(8, 16):
        ax = plt.subplot(2, 4, i - 7)
        ax.set_title(f"Choice {i - 7}")
        ax.imshow(image[i], cmap='gray')
        ax.axis('off')
        rect = Rectangle((0, 0), 1, 1, transform=ax.transAxes, color='gray', fill=False, lw=1.8)
        ax.add_patch(rect)
    plt.subplots_adjust(wspace=0.1, hspace=0.3)
    plt.savefig(filename)
    plt.close(fig)

In [34]:
output_folder = 'image'
os.makedirs(output_folder, exist_ok=True)

# Make a copy of result_df to update
updated_result_df = result_df.copy()

for idx, row in updated_result_df.iterrows():
    file_base = row['File_Name']
    design = row['Design']
    source_files = row['Source_file']

    if not isinstance(source_files, list) or len(source_files) == 0:
        continue

    correct_options = []

    for i, file_name in enumerate(source_files):
        if not file_name.endswith('.npz'):
            file_name += '.npz'

        # === Build full path ===
        project_dir = os.path.dirname(os.getcwd())
        design_paths = {
            'In_Out_Center_Single': 'output/in_center_single_out_center_single',
            'In_Out_Distribute_Four': 'output/in_distribute_four_out_center_single',
            'Left_Right_Single': 'output/left_center_single_right_center_single',
            'Up_Down_Single': 'output/up_center_single_down_center_single',
        }

        folder = design_paths.get(design)
        if not folder:
            print(f"Design {design} not recognized. Skipping row {idx}.")
            continue

        npz_path = os.path.join(project_dir, folder, file_name)
        if not os.path.exists(npz_path):
            print(f"File not found: {npz_path}")
            continue

        # === Load and extract target ===
        data = np.load(npz_path)
        image = data['image']
        target = int(data['target'])  # Ensure it's a scalar int
        correct_options.append(target)

        # === Render and save ===
        if len(source_files) == 1:
            output_name = f"{file_base}_val.png"
        else:
            output_name = f"{file_base}_val_{i}.png"

        save_path = os.path.join(output_folder, output_name)

        # Temp save
        os.makedirs("temp_img", exist_ok=True)
        problem_grid_path = "temp_img/problem_grid.png"
        choices_plot_path = "temp_img/choices_plot.png"

        save_problem_grid(image, problem_grid_path)
        save_choices_plot(image, target, choices_plot_path)

        # Combine images
        from PIL import Image
        problem_grid = Image.open(problem_grid_path)
        choices_plot = Image.open(choices_plot_path)

        combined_width = max(problem_grid.width, choices_plot.width)
        combined_height = problem_grid.height + choices_plot.height

        combined_image = Image.new("RGB", (combined_width, combined_height), (255, 255, 255))
        combined_image.paste(problem_grid, ((combined_width - problem_grid.width) // 2, 0))
        combined_image.paste(choices_plot, ((combined_width - choices_plot.width) // 2, problem_grid.height))

        combined_image.save(save_path)

    # === Update the correct option column in result_df ===
    if len(correct_options) == 1:
        updated_result_df.at[idx, 'Correct Option'] = correct_options[0]
    else:
        updated_result_df.at[idx, 'Correct Option'] = correct_options

In [35]:
updated_result_df

Unnamed: 0,ID,Design,Numer/Position_0,Type_0,Size_0,Color_0,Number/Position_1,Type_1,Size_1,Color_1,Source_file,File_Name,Correct Option,Reece's Comments,New_Item,New Correct Choice
0,17,In_Out_Distribute_Four,Constant,Distribute_Three,Distribute_Three,Constant,Constant,Distribute_Three,Arithmetic,Constant,"[RAVEN_11284_train, RAVEN_11558_test, RAVEN_12...",IOF_Dt_Dt_Dt_A,"[6, 2, 6, 2, 2, 0, 2, 0, 7, 0]",Easy,,
1,12,In_Out_Distribute_Four,Constant,Distribute_Three,Distribute_Three,Constant,Constant,Progression,Arithmetic,Constant,"[RAVEN_1190_train, RAVEN_12633_train, RAVEN_14...",IOF_Dt_Dt_P_A,"[2, 4, 2, 7, 7, 0, 4, 5, 1]",Moderate,,
2,10,In_Out_Distribute_Four,Constant,Distribute_Three,Progression,Constant,Constant,Distribute_Three,Distribute_Three,Constant,"[RAVEN_10906_val, RAVEN_11461_train, RAVEN_114...",IOF_Dt_P_Dt_Dt,"[2, 1, 6, 2, 5, 6]",Hard,,
3,21,In_Out_Distribute_Four,Constant,Distribute_Three,Progression,Constant,Constant,Distribute_Three,Progression,Constant,"[RAVEN_10887_val, RAVEN_6945_train]",IOF_Dt_P_Dt_P,"[4, 5]",Moderate,,
4,15,In_Out_Distribute_Four,Constant,Distribute_Three,Progression,Constant,Constant,Progression,Distribute_Three,Constant,"[RAVEN_11643_train, RAVEN_14524_train, RAVEN_2...",IOF_Dt_P_P_Dt,"[4, 6, 0, 4, 6]",Moderate,,
5,20,In_Out_Distribute_Four,Constant,Distribute_Three,Progression,Constant,Constant,Progression,Progression,Constant,"[RAVEN_11088_test, RAVEN_3900_train, RAVEN_462...",IOF_Dt_P_P_P,"[2, 4, 6, 4, 7]",Easy,,
6,9,In_Out_Distribute_Four,Constant,Progression,Distribute_Three,Constant,Constant,Progression,Arithmetic,Constant,"[RAVEN_10390_train, RAVEN_2909_test, RAVEN_342...",IOF_P_Dt_P_A,"[0, 1, 6, 4, 1, 6, 2, 2, 7, 1, 0]",Moderate,,
7,19,In_Out_Distribute_Four,Constant,Progression,Progression,Constant,Constant,Progression,Arithmetic,Constant,"[RAVEN_14264_train, RAVEN_3053_train, RAVEN_45...",IOF_P_P_P_A,"[4, 0, 2, 2, 5, 0, 3, 4]",Easy,,
8,16,In_Out_Distribute_Four,Constant,Progression,Progression,Constant,Constant,Progression,Distribute_Three,Constant,"[RAVEN_1646_val, RAVEN_2170_train, RAVEN_8771_...",IOF_P_P_P_Dt,"[1, 7, 7]",Moderate,,
9,11,In_Out_Distribute_Four,Constant,Progression,Progression,Constant,Constant,Progression,Progression,Constant,"[RAVEN_14670_train, RAVEN_5786_val, RAVEN_8115...",IOF_P_P_P_P,"[7, 1, 0]",Hard,,


In [36]:
updated_result_df.to_excel('result.xlsx')