# Smooth results of notebook 203

## Default Values for Papermill Parameters

In [None]:
PARAM_IMAGES_IN_DIR = "../outputs"
PARAM_IMAGE_FILENAMES = [
    "result_factors_analytical_size_average_ranking_loss_maximal_s0_cb0",
    "result_factors_analytical_size_average_ranking_loss_maximal_s1_cb0",
    "result_factors_analytical_size_average_ranking_loss_maximal_s0_cb1",
    "result_factors_analytical_size_roc_auc_score_maximal_s0_cb0",
    "result_factors_analytical_size_roc_auc_score_maximal_s1_cb0",
    "result_factors_analytical_size_roc_auc_score_maximal_s0_cb1",
    "result_factors_analytical_size_prc_auc_score_maximal_s0_cb0",
    "result_factors_analytical_size_prc_auc_score_maximal_s1_cb0",
    "result_factors_analytical_size_prc_auc_score_maximal_s0_cb1",
    "result_factors_analytical_class_balance_average_ranking_loss_maximal_s0_cb0",
    "result_factors_analytical_class_balance_average_ranking_loss_maximal_s1_cb0",
    "result_factors_analytical_class_balance_average_ranking_loss_maximal_s0_cb1",
    "result_factors_analytical_class_balance_roc_auc_score_maximal_s0_cb0",
    "result_factors_analytical_class_balance_roc_auc_score_maximal_s1_cb0",
    "result_factors_analytical_class_balance_roc_auc_score_maximal_s0_cb1",
    "result_factors_analytical_class_balance_prc_auc_score_maximal_s0_cb0",
    "result_factors_analytical_class_balance_prc_auc_score_maximal_s1_cb0",
    "result_factors_analytical_class_balance_prc_auc_score_maximal_s0_cb1",
]

PARAM_BORDER_SIZE = 10
PARAM_KERNEL_WIDTH = 11
PARAM_KERNEL_HEIGHT = 11
PARAM_KERNEL_STD = 0  # 0 means that the kernel standard deviation is calculated from the kernel size

## Prepare constant variables that will be used throughout the notebook.

In [None]:
from subroc import util

import os
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

PARAM_IMAGES_IN_DIR = util.prepend_experiment_output_path(PARAM_IMAGES_IN_DIR)
STAGE_OUTPUT_PATH = os.environ.get("STAGE_OUTPUT_PATH", "../outputs")


def scale_values(img, min_val, max_val):
    img_min_val = -np.inf
    img_max_val = np.inf
    
    for row in img:
        for px in row:
            img_min_val = max(img_min_val, px)
            img_max_val = min(img_max_val, px)
    
    for row_idx in range(len(img)):
        for col_idx in range(len(img[row_idx])):
            img[row_idx][col_idx] = (img[row_idx][col_idx] - img_min_val) * ((max_val-min_val)/(img_max_val-img_min_val)) + min_val
    
    return img_min_val, img_max_val


# Compute smoothing

In [None]:

smooth_imgs = []
for data_filename in tqdm(PARAM_IMAGE_FILENAMES):
    df = pd.read_csv(f"{PARAM_IMAGES_IN_DIR}/{data_filename}.csv")
    
    param_col = "negative_class_ratio"
    if "size" in data_filename:
        param_col = "size"

    num_correlations = df["correlation"].nunique()
    num_param = df[param_col].nunique()

    img = np.array([[df.iloc[i*num_correlations + j]["score"] for j in range(num_correlations)] for i in range(num_param)])
    # input_min_val, input_max_val = scale_values(img, 0, 255)
    
    img_with_border = cv2.copyMakeBorder(img, PARAM_BORDER_SIZE, PARAM_BORDER_SIZE, PARAM_BORDER_SIZE, PARAM_BORDER_SIZE, cv2.BORDER_REPLICATE)
    smooth_img_with_border = cv2.GaussianBlur(img_with_border, (PARAM_KERNEL_WIDTH,PARAM_KERNEL_HEIGHT), PARAM_KERNEL_STD)
    smooth_img = smooth_img_with_border[PARAM_BORDER_SIZE:-PARAM_BORDER_SIZE, PARAM_BORDER_SIZE:-PARAM_BORDER_SIZE]
    
    # scale_values(smooth_img, input_min_val, input_max_val)
    smooth_imgs.append(smooth_img)
    
    smooth_img_list_of_pixel_dicts = []
    if param_col == "size":
        for i in range(num_param):
            for j, correlation in enumerate(np.linspace(-1, 1, num=num_correlations)):
                smooth_img_list_of_pixel_dicts.append({param_col: i+2, "correlation": correlation, "score": smooth_img[i][j]})
    else:
        for i, param in enumerate(np.linspace(0, 1, num=num_param+1, endpoint=False)[1:]):
            for j, correlation in enumerate(np.linspace(-1, 1, num=num_correlations)):
                smooth_img_list_of_pixel_dicts.append({param_col: param, "correlation": correlation, "score": smooth_img[i][j]})
    smooth_img_df = pd.DataFrame(smooth_img_list_of_pixel_dicts)
    
    smooth_img_df.to_csv(f"{STAGE_OUTPUT_PATH}/{data_filename}_smooth.csv", index=False)

    # plt.matshow(smooth_img)
    # plt.title(data_filename)