# Feature Importance Calculation using Activation Data

This notebook provides various methods to calculate feature importances based on activation data from neural networks. The methods include different statistical approaches such as mean activation, mean multiplied by standard deviation, and robust methods that penalize high or low variability.


In [49]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd


In [50]:
# 40 + 2
selected_features = [
    'World_Gaze_Direction_R_X', 'World_Gaze_Direction_R_Y', 'World_Gaze_Direction_R_Z',
    'World_Gaze_Direction_L_X', 'World_Gaze_Direction_L_Y', 'World_Gaze_Direction_L_Z',
    'World_Gaze_Origin_R_X', 'World_Gaze_Origin_R_Z', 'World_Gaze_Origin_L_X', 
    'World_Gaze_Origin_L_Z', 'Vergence_Angle', 'Vergence_Depth', 'Normalized_Depth',
    'Directional_Magnitude_R', 'Directional_Magnitude_L', 'Cosine_Angles', 'Gaze_Point_Distance',
    'Normalized_Vergence_Angle', 'Delta_Gaze_X', 'Delta_Gaze_Y', 'Delta_Gaze_Z',
    'Rolling_Mean_Normalized_Depth', 'Gaze_Vector_Angle', 'Gaze_Point_Depth_Difference',
    'Relative_Change_Vergence_Angle', 'Ratio_Directional_Magnitude', 'Ratio_Delta_Gaze_XY',
    'Ratio_World_Gaze_Direction_X', 'Ratio_World_Gaze_Direction_Y', 'Ratio_World_Gaze_Direction_Z',
    'Interaction_Normalized_Depth_Vergence_Angle', 'Lag_1_Normalized_Depth', 'Diff_Normalized_Depth',
    'Directional_Magnitude_Ratio', 'Gaze_Direction_X_Ratio', 'Gaze_Direction_Y_Ratio', 
    'Gaze_Direction_Z_Ratio', 'Angular_Difference_X', 'Depth_Angle_Interaction', 
    'Gaze_Point_Euclidean_Distance', 'Gaze_Direction_Angle', 'Velocity_Gaze_Direction_R_X', 
    'Acceleration_Gaze_Direction_R_X', 'Velocity_Gaze_Direction_R_Y', 'Acceleration_Gaze_Direction_R_Y', 
    'Velocity_Gaze_Direction_R_Z', 'Acceleration_Gaze_Direction_R_Z', 'Velocity_Gaze_Direction_L_X', 
    'Acceleration_Gaze_Direction_L_X', 'Velocity_Gaze_Direction_L_Y', 'Acceleration_Gaze_Direction_L_Y', 
    'Velocity_Gaze_Direction_L_Z', 'Acceleration_Gaze_Direction_L_Z', 
    'Angular_Difference_Gaze_Directions'
]


In [51]:
def load_activation_data(user_folder):
    activation_file_path = os.path.join(user_folder, 'intermediates_activations.npy')
    if os.path.exists(activation_file_path):
        try:
            activations_data = np.load(activation_file_path, allow_pickle=True).item()
            layer_activation = list(activations_data.values())[0]
            if isinstance(layer_activation, np.ndarray) and layer_activation.size > 0:
                if layer_activation.ndim > 2:
                    layer_activation = np.mean(layer_activation, axis=1)
                return layer_activation
        except Exception as e:
            print(f"Error loading activation data from {activation_file_path}: {e}")
    else:
        print(f"Activation file does not exist at {activation_file_path}")
    return None


In [52]:
def calculate_actif_mean(activation):
    activation_abs = np.abs(activation)
    mean_activation = np.mean(activation_abs, axis=0)
    std_activation = np.std(activation_abs, axis=0)
    return mean_activation, mean_activation, std_activation


In [53]:
def calculate_actif_meanstddev(activation):
    activation_abs = np.abs(activation)
    mean_activation = np.mean(activation_abs, axis=0)
    std_activation = np.std(activation_abs, axis=0)
    weighted_importance = mean_activation * std_activation
    return weighted_importance, mean_activation, std_activation


In [54]:
def calculate_actif_weighted_mean(activation):
    activation_abs = np.abs(activation)
    mean_activation = np.mean(activation_abs, axis=0)
    normalized_mean = (mean_activation - np.min(mean_activation)) / (
        np.max(mean_activation) - np.min(mean_activation))
    std_activation = np.std(activation_abs, axis=0)
    normalized_std = (std_activation - np.min(std_activation)) / (np.max(std_activation) - np.min(std_activation))
    adjusted_importance = (normalized_mean + normalized_std) / 2
    return adjusted_importance, mean_activation, std_activation


In [55]:
def calculate_actif_inverted_weighted_mean(activation):
    activation_abs = np.abs(activation)
    mean_activation = np.mean(activation_abs, axis=0)
    normalized_mean = (mean_activation - np.min(mean_activation)) / (
        np.max(mean_activation) - np.min(mean_activation))
    std_activation = np.std(activation_abs, axis=0)
    normalized_std = (std_activation - np.min(std_activation)) / (np.max(std_activation) - np.min(std_activation))
    inverse_normalized_std = 1 - normalized_std
    adjusted_importance = (normalized_mean + inverse_normalized_std) / 2
    return adjusted_importance, mean_activation, std_activation


In [56]:
def calculate_actif_robust(activations, epsilon=0.01, min_std_threshold=0.01):
    activation_abs = np.abs(activations)
    mean_activation = np.mean(activation_abs, axis=0)
    std_activation = np.std(activation_abs, axis=0)
    normalized_mean = (mean_activation - np.min(mean_activation)) / (
        np.max(mean_activation) - np.min(mean_activation) + epsilon)
    transformed_std = np.exp(-std_activation / min_std_threshold)
    adjusted_importance = normalized_mean * (1 - transformed_std)
    return adjusted_importance, mean_activation, std_activation


In [57]:
def calculate_actif_robust_penHigh(activations, epsilon=0.01, min_std_threshold=0.01):
    activation_abs = np.abs(activations)
    mean_activation = np.mean(activation_abs, axis=0)
    std_activation = np.std(activation_abs, axis=0)
    normalized_mean = (mean_activation - np.min(mean_activation)) / (
        np.max(mean_activation) - np.min(mean_activation) + epsilon)
    transformed_std = np.exp(-std_activation / min_std_threshold)
    adjusted_importance = normalized_mean * transformed_std
    return adjusted_importance, mean_activation, std_activation


In [58]:
def actif_general_all_subjects(subject_folders, calculation_function, layer_index=0):
    """
    Generalized function to calculate and combine feature importances across multiple subjects and layers.

    Parameters:
    - subject_folders (List[str]): List of paths to the user folders containing activation data.
    - calculation_function (callable): A function that takes in activation data and returns feature importance.
    - layer_index (int): Index of the layer to process (e.g., 0 or 1).

    Returns:
    - List[dict]: List of dictionaries containing feature names and their corresponding combined attributions.
    """
    selected_features2 = [value for value in selected_features if value not in ('SubjectID', 'Gt_Depth')]
    
    # Initialize lists to collect combined results
    all_mean_importances = []

    for user_folder in subject_folders:
        activation_file_path = os.path.join(user_folder, 'intermediates_activations.npy')
        
        if os.path.exists(activation_file_path):
            try:
                activations_data = np.load(activation_file_path, allow_pickle=True).item()
                layer_activation = list(activations_data.values())[layer_index]  # Select the specific layer
                
                if isinstance(layer_activation, np.ndarray) and layer_activation.size > 0:
                    if layer_activation.ndim > 2:
                        layer_activation = np.mean(layer_activation, axis=1)
                    
                    importance, _, _ = calculation_function(layer_activation)
                    all_mean_importances.append(importance)
                else:
                    print(f"No valid activation data for layer {layer_index} in {user_folder}")
            except Exception as e:
                print(f"Error processing {activation_file_path}: {e}")
        else:
            print(f"Activation file does not exist at {activation_file_path}")

    if all_mean_importances:
        # Calculate the average importance across all subjects
        combined_importances = np.mean(np.array(all_mean_importances), axis=0)

        # Sort features based on combined importances
        sorted_indices = np.argsort(-combined_importances)
        sorted_features = np.array(selected_features2)[sorted_indices]
        sorted_combined_importances = combined_importances[sorted_indices]

        # Prepare results as a list of dictionaries
        results = [{'feature': feature, 'attribution': sorted_combined_importances[i]} for i, feature in
                   enumerate(sorted_features)]
        return results
    else:
        return []


In [61]:

def compile_results_with_layers_as_columns(subject_folders):
    # Define the methods and corresponding functions
    methods = {
        'mean': calculate_actif_mean,
        'mean_stddev': calculate_actif_meanstddev,
        'weighted_mean': calculate_actif_weighted_mean,
        'inverted_weighted_mean': calculate_actif_inverted_weighted_mean,
        'robust': calculate_actif_robust,
        'robust_penHigh': calculate_actif_robust_penHigh
    }

    # Loop through each method
    for method_name, method_function in methods.items():
        results = {}

        # Process for both layers (0 and 1)
        for layer_index in [0, 1]:
            result = actif_general_all_subjects(subject_folders, method_function, layer_index=layer_index)
            for item in result:
                feature = item['feature']
                if feature not in results:
                    results[feature] = {}
                results[feature][f'Layer {layer_index}'] = item['attribution']

        # Convert the results dictionary to a DataFrame
        df = pd.DataFrame.from_dict(results, orient='index').reset_index()
        df.rename(columns={'index': 'Feature'}, inplace=True)

        # Export DataFrame to CSV
        output_file = f"Rankings/feature_importance_{method_name}.csv"
        df.to_csv(output_file, index=False)

        print(f"Results for method '{method_name}' successfully saved to {output_file}")

# Example usage
user_folders = 'D:\\git\\Results_FOVAL'
# List all subject folders in the main directory
subject_folders = [os.path.join(user_folders, name) for name in os.listdir(user_folders) if os.path.isdir(os.path.join(user_folders, name))]

compile_results_with_layers_as_columns(subject_folders)


Results for method 'mean' successfully saved to feature_importance_mean.csv
Results for method 'mean_stddev' successfully saved to feature_importance_mean_stddev.csv
Results for method 'weighted_mean' successfully saved to feature_importance_weighted_mean.csv
Results for method 'inverted_weighted_mean' successfully saved to feature_importance_inverted_weighted_mean.csv
Results for method 'robust' successfully saved to feature_importance_robust.csv
Results for method 'robust_penHigh' successfully saved to feature_importance_robust_penHigh.csv


In [None]:
# TEST
import os
import numpy as np
import matplotlib.pyplot as plt

# Your selected features list ...

def calculate_weighted_importance_product(activation):
    mean_activation = np.mean(activation, axis=0)
    std_activation = np.std(activation, axis=0)
    weighted_importance = mean_activation * std_activation
    return weighted_importance, mean_activation, std_activation

def calculate_adjusted_importance(activation):
    mean_activation = np.mean(activation, axis=0)
    normalized_mean = (mean_activation - np.min(mean_activation)) / (np.max(mean_activation) - np.min(mean_activation) + 1e-8)
    std_activation = np.std(activation, axis=0)
    normalized_std = (std_activation - np.min(std_activation)) / (np.max(std_activation) - np.min(std_activation) + 1e-8)
    adjusted_importance = (normalized_mean + normalized_std) / 2
    return adjusted_importance, mean_activation, std_activation

results_dir = 'D:/git/Results_FOVAL'
subject_folders = [f.path for f in os.scandir(results_dir) if f.is_dir()]

subject_names = [os.path.basename(folder) for folder in subject_folders]

# Placeholder arrays to collect metrics for all subjects
all_mean_activations = []
all_std_activations = []
all_weighted_importances = []

for folder in subject_folders:
    activation_file_path = os.path.join(folder, 'intermediates_activations.npy')
    if os.path.exists(activation_file_path):
        try:
            activations_data = np.load(activation_file_path, allow_pickle=True).item()
            layer_activation = list(activations_data.values())[0]
            
            if isinstance(layer_activation, np.ndarray) and layer_activation.size > 0:
                if layer_activation.ndim > 2:
                    layer_activation = np.mean(layer_activation, axis=1)

                weighted_importance, mean_activation, std_activation = calculate_adjusted_importance(layer_activation)
                all_mean_activations.append(mean_activation)
                all_std_activations.append(std_activation)
                all_weighted_importances.append(weighted_importance)
        except Exception as e:
            print(f"Error processing {activation_file_path}: {e}")

# Ensure all lists are not empty
if all_mean_activations and all_std_activations and all_weighted_importances:
    mean_activations = np.mean(np.array(all_mean_activations), axis=0)
    std_activations = np.mean(np.array(all_std_activations), axis=0)
    weighted_importances = np.mean(np.array(all_weighted_importances), axis=0)

    sorted_indices = np.argsort(weighted_importances)
    sorted_features = np.array(selected_features)[sorted_indices]
    sorted_mean = mean_activations[sorted_indices]
    sorted_std = std_activations[sorted_indices]
    sorted_weighted_mean = weighted_importances[sorted_indices]

    n_features = len(sorted_features)
    ind = np.arange(n_features)
    width = 0.25

    plt.figure(figsize=(18, 10))
    bars1 = plt.bar(ind - width, sorted_mean, width, color='blue', label='Mean Activation')
    bars2 = plt.bar(ind, sorted_std, width, color='red', label='Standard Deviation')
    bars3 = plt.bar(ind + width, sorted_weighted_mean, width, color='green', label='Weighted Mean Activation')

    plt.ylabel('Scores')
    plt.title('Feature Importance Metrics Across All Subjects (First Layer)')
    plt.xticks(ind, sorted_features, rotation=90)
    plt.xlabel('Features')
    plt.legend()

    plt.tight_layout()
    plt.show()

    most_important_feature_index = np.argmin(sorted_weighted_mean)
    most_important_feature = sorted_features[most_important_feature_index]
    print(most_important_feature)
else:
    print("No valid activation data found.")
