In [4]:
%cd ~/SSMuLA

/disk2/fli/SSMuLA


In [5]:
%load_ext autoreload
%autoreload 2
%load_ext blackcellmagic

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The blackcellmagic extension is already loaded. To reload it, use:
  %reload_ext blackcellmagic


In [8]:
from SSMuLA.get_corr import LANDSCAPE_ATTRIBUTES, val_list

In [72]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from matplotlib.colors import LinearSegmentedColormap, to_rgb
import matplotlib.pyplot as plt
import seaborn as sns

In [15]:
# Function to apply different gradients
def apply_gradient(s, colormap='YlGnBu', mse_colormap='coolwarm'):
    # Copy to avoid modifying the original data
    is_mse = s.name == 'mse'
    if is_mse:
        return [f'background-color: {x}' for x in sns.color_palette(mse_colormap, len(s))]
    else:
        return [f'background-color: {x}' for x in sns.color_palette(colormap, len(s))]


In [78]:
# Custom colormap for the MSE row, using greens
colors = ["#FFFFFF", "#9bbb59"]  # dark to light green
cmap_mse = LinearSegmentedColormap.from_list("mse_cmap_r", colors[::-1], N=100)

# def text_color(val):
#     rgb = to_rgb(cmap_mse(val))
#     # Perceived luminance formula: 0.299*R + 0.587*G + 0.114*B
#     luminance = 0.299*rgb[0] + 0.587*rgb[1] + 0.114*rgb[2]
#     return 'white' if luminance < 0.5 else 'black'

# Styling the DataFrame
def style_dataframe(df):
    # Define a function to apply gradient selectively
    def apply_gradient(row):
        if row.name == 'mse':
            # Generate colors for the MSE row based on its values
            norm = plt.Normalize(row.min(), row.max())
            rgba_colors = [cmap_mse(norm(value)) for value in row]
            return [f'background-color: rgba({int(rgba[0]*255)}, {int(rgba[1]*255)}, {int(rgba[2]*255)}, {rgba[3]})' for rgba in rgba_colors]
        else:
            return [''] * len(row)  # No style for other rows
    
    # Apply gradient across all rows
    styled_df = df.style.background_gradient(cmap='Blues')
    # Apply the custom gradient to the MSE row
    styled_df = styled_df.apply(apply_gradient, axis=1)
    return styled_df.format("{:.2f}").apply(lambda x: ['color: black' if x.name == 'mse' else '' for _ in x], axis=1)


In [39]:
df = pd.read_csv("results/corr/384/boosting|ridge-top96/merge_all.csv")

# Load your dataset
# data = pd.read_csv('path_to_your_data.csv')

# Select features and targets
features = df[LANDSCAPE_ATTRIBUTES]
targets = df[val_list]

importance_df_list = []

# Splitting the dataset for each target and fitting a model
for target in targets.columns:
    X_train, X_test, y_train, y_test = train_test_split(features, targets[target], test_size=0.2, random_state=42)
    
    # Model initialization and training
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Prediction and performance evaluation
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    # print(f'Model for {target}, Mean Squared Error: {mse}')
    
    # Feature importance
    feature_importances = pd.DataFrame(model.feature_importances_,
                                       index = X_train.columns,
                                       columns=[target])
    feature_importances.loc["mse"] = mse
    importance_df_list.append(feature_importances)
importance_df = pd.concat(importance_df_list, axis=1)

In [79]:
style_dataframe(importance_df)

Unnamed: 0,single_step_DE_mean_all,single_step_DE_median_all,single_step_DE_mean_top96,single_step_DE_median_top96,single_step_DE_mean_top384,single_step_DE_median_top384,single_step_DE_fraction_max,recomb_SSM_mean_all,recomb_SSM_median_all,recomb_SSM_mean_top96,recomb_SSM_median_top96,recomb_SSM_mean_top384,recomb_SSM_median_top384,recomb_SSM_fraction_max,top96_SSM_mean_all,top96_SSM_median_all,top96_SSM_mean_top96,top96_SSM_median_top96,top96_SSM_mean_top384,top96_SSM_median_top384,top96_SSM_fraction_max,top_maxes,top_means,ndcgs,rhos,if_truemaxs,maxes_Triad,means_Triad,ndcgs_Triad,rhos_Triad,if_truemaxs_Triad,maxes_ev,means_ev,ndcgs_ev,rhos_ev,if_truemaxs_ev,maxes_esm,means_esm,ndcgs_esm,rhos_esm,if_truemaxs_esm,maxes_esmif,means_esmif,ndcgs_esmif,rhos_esmif,if_truemaxs_esmif,mlde_single_step_DE_delta,mlde_recomb_SSM_delta,mlde_top96_SSM_delta,Triad_single_step_DE_delta,Triad_recomb_SSM_delta,Triad_top96_SSM_delta,ev_single_step_DE_delta,ev_recomb_SSM_delta,ev_top96_SSM_delta,esm_single_step_DE_delta,esm_recomb_SSM_delta,esm_top96_SSM_delta,esmif_single_step_DE_delta,esmif_recomb_SSM_delta,esmif_top96_SSM_delta,delta_ft_mlde,delta_ft_de
n_site,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.04,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
numb_measured,0.04,0.02,0.01,0.01,0.0,0.01,0.17,0.25,0.19,0.0,0.01,0.0,0.02,0.03,0.11,0.01,0.02,0.02,0.01,0.02,0.25,0.2,0.02,0.02,0.01,0.24,0.13,0.01,0.02,0.01,0.34,0.16,0.01,0.02,0.02,0.48,0.33,0.02,0.02,0.01,0.29,0.15,0.01,0.03,0.02,0.49,0.05,0.01,0.01,0.0,0.03,0.01,0.01,0.0,0.02,0.01,0.0,0.01,0.01,0.01,0.01,0.02,0.0
percent_measured,0.08,0.12,0.11,0.09,0.07,0.09,0.01,0.03,0.08,0.1,0.11,0.14,0.13,0.1,0.07,0.11,0.15,0.15,0.15,0.15,0.06,0.08,0.03,0.04,0.0,0.04,0.14,0.02,0.08,0.01,0.01,0.11,0.03,0.05,0.01,0.01,0.08,0.01,0.02,0.02,0.01,0.12,0.03,0.04,0.04,0.01,0.02,0.01,0.02,0.03,0.02,0.01,0.01,0.0,0.01,0.01,0.01,0.01,0.02,0.02,0.02,0.07,0.01
numb_active,0.04,0.04,0.08,0.08,0.04,0.08,0.02,0.02,0.02,0.1,0.04,0.09,0.02,0.12,0.02,0.04,0.06,0.04,0.07,0.04,0.02,0.03,0.02,0.02,0.01,0.03,0.05,0.02,0.07,0.02,0.11,0.04,0.02,0.05,0.01,0.05,0.03,0.03,0.12,0.0,0.08,0.04,0.03,0.05,0.01,0.07,0.02,0.0,0.02,0.01,0.0,0.0,0.02,0.01,0.0,0.02,0.01,0.01,0.02,0.0,0.0,0.04,0.0
percent_active,0.03,0.02,0.05,0.04,0.03,0.04,0.01,0.06,0.03,0.07,0.04,0.05,0.03,0.05,0.01,0.02,0.02,0.03,0.02,0.03,0.01,0.0,0.06,0.02,0.05,0.01,0.03,0.06,0.0,0.05,0.0,0.02,0.06,0.03,0.06,0.01,0.03,0.04,0.03,0.03,0.0,0.04,0.03,0.01,0.03,0.0,0.01,0.0,0.01,0.02,0.01,0.0,0.02,0.0,0.01,0.02,0.01,0.02,0.02,0.01,0.01,0.01,0.01
active_fit_min,0.02,0.02,0.01,0.0,0.0,0.0,0.07,0.02,0.02,0.01,0.01,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.07,0.0,0.03,0.03,0.02,0.07,0.01,0.04,0.01,0.01,0.01,0.04,0.03,0.01,0.02,0.01,0.02,0.03,0.01,0.05,0.02,0.03,0.03,0.04,0.01,0.0,0.0,0.01,0.01,0.01,0.01,0.02,0.01,0.01,0.02,0.01,0.01,0.01,0.01,0.01,0.01,0.03,0.01
parent_fit,0.03,0.06,0.01,0.0,0.0,0.0,0.02,0.01,0.01,0.0,0.02,0.01,0.06,0.01,0.02,0.02,0.02,0.01,0.03,0.01,0.01,0.01,0.01,0.05,0.01,0.02,0.01,0.02,0.08,0.0,0.02,0.0,0.01,0.04,0.0,0.03,0.01,0.01,0.03,0.0,0.02,0.01,0.02,0.08,0.0,0.02,0.04,0.09,0.01,0.02,0.04,0.0,0.01,0.04,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.02,0.03
parent_rank,0.08,0.12,0.0,0.01,0.05,0.01,0.02,0.04,0.04,0.02,0.15,0.07,0.15,0.05,0.12,0.15,0.16,0.16,0.15,0.16,0.11,0.08,0.01,0.09,0.0,0.05,0.01,0.01,0.18,0.01,0.01,0.01,0.02,0.07,0.0,0.01,0.01,0.02,0.11,0.01,0.01,0.0,0.01,0.09,0.02,0.02,0.0,0.0,0.01,0.01,0.01,0.03,0.0,0.01,0.03,0.0,0.01,0.03,0.0,0.01,0.03,0.11,0.01
parent_rank_percent,0.06,0.04,0.02,0.02,0.03,0.02,0.01,0.02,0.05,0.01,0.08,0.02,0.05,0.03,0.06,0.08,0.1,0.06,0.09,0.06,0.02,0.05,0.01,0.06,0.0,0.03,0.01,0.0,0.06,0.01,0.01,0.02,0.01,0.0,0.0,0.01,0.01,0.01,0.05,0.0,0.0,0.02,0.01,0.05,0.0,0.02,0.0,0.01,0.0,0.01,0.02,0.02,0.01,0.02,0.02,0.01,0.01,0.01,0.01,0.01,0.02,0.05,0.01
mean,0.03,0.03,0.08,0.07,0.0,0.07,0.0,0.04,0.02,0.1,0.03,0.09,0.03,0.03,0.03,0.04,0.03,0.01,0.03,0.01,0.0,0.03,0.05,0.03,0.02,0.0,0.07,0.07,0.0,0.06,0.01,0.02,0.03,0.03,0.05,0.01,0.05,0.05,0.01,0.06,0.01,0.04,0.05,0.0,0.05,0.02,0.0,0.01,0.01,0.02,0.02,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.02,0.02,0.02


In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

# Sample data setup
data = {
    'single_step_DE_mean_all': [0.111337, 0.095763, 0.093382, 0.16760788934389106],
    'single_step_DE_median_all': [0.133158, 0.116305, 0.099114, 0.004660274107895844]
}
index = ['fraction_non-magnitude', 'fraction_reciprocal-sign', 'parent_rank_percent', 'MSE']

# Create DataFrame
df = pd.DataFrame(data, index=index)

# Custom colormap for the MSE row
colors = ["#ffcccb", "#ffb6c1"]  # Light red to deeper pink
cmap_mse = LinearSegmentedColormap.from_list("mse_cmap", colors, N=100)

# Styling the DataFrame
def style_dataframe(df):
    # Define a function to apply gradient selectively
    def apply_gradient(row):
        if row.name == 'MSE':
            # Generate colors for the MSE row based on its values
            norm = plt.Normalize(row.min(), row.max())
            rgba_colors = [cmap_mse(norm(value)) for value in row]
            return [f'background-color: rgba({int(rgba[0]*255)}, {int(rgba[1]*255)}, {int(rgba[2]*255)}, {rgba[3]})' for rgba in rgba_colors]
        else:
            return [''] * len(row)  # No style for other rows
    
    # Apply gradient across all rows
    styled_df = df.style.background_gradient(cmap='YlGnBu')
    # Apply the custom gradient to the MSE row
    styled_df = styled_df.apply(apply_gradient, axis=1)
    return styled_df

# Apply the custom styling
styled_df = style_dataframe(df)

# Display the styled DataFrame
styled_df


Unnamed: 0,single_step_DE_mean_all,single_step_DE_median_all
fraction_non-magnitude,0.111337,0.133158
fraction_reciprocal-sign,0.095763,0.116305
parent_rank_percent,0.093382,0.099114
MSE,0.167608,0.00466
