# Generates a Feature Importance Rank Heatmap (for AutoMLPipe-BC)


This code for this visualization was provided by Sy Hwang in September of 2021.


## Import Required Packages

In [35]:

import pandas as pd
pd.set_option('display.max_rows', None)

from bokeh.io import output_file, save, export_png
from bokeh.models import (BasicTicker, ColorBar, ColumnDataSource,
                          ContinuousColorMapper, LinearColorMapper, HoverTool)
from bokeh.plotting import figure
from bokeh.transform import transform
from bokeh.palettes import Cividis256

import warnings
warnings.filterwarnings('ignore')

# Jupyter Notebook Hack: This code ensures that the results of multiple commands within a given cell are all displayed, rather than just the last. 
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Set Run Parameters

In [None]:
experiment_path = "C:/Users/ryanu/Documents/Analysis/AutoMLPipe_Experiments/hcc_demo"

In [191]:
fi_files = [
    'XGB_FI.csv',
    'LGB_FI.csv',
    'GB_FI.csv',
    'RF_FI.csv',
    'DT_FI.csv',
    'NB_FI.csv',
    'LR_FI.csv',
    'KN_FI.csv',
    'ANN_FI.csv',
    'SVM_FI.csv',
    'ExSTraCS_FI.csv']

## Automatically Detect Dataset Names

In [None]:
# Get dataset paths for all completed dataset analyses in experiment folder
datasets = os.listdir(experiment_path)
experiment_name = experiment_path.split('/')[-1] #Name of experiment folder
datasets.remove('metadata.csv')
try:
    datasets.remove('jobsCompleted')
except:
    pass
try:
    datasets.remove('UsefulNotebooks')
except:
    pass
try:
    datasets.remove('logs')
    datasets.remove('jobs')
except:
    pass
try:
    datasets.remove('DatasetComparisons') #If it has been run previously (overwrite)
except:
    pass
try:
    datasets.remove('KeyFileCopy') #If it has been run previously (overwrite)
except:
    pass
try:
    datasets.remove(experiment_name+'_ML_Pipeline_Report.pdf') #If it has been run previously (overwrite)
except:
    pass
datasets = sorted(datasets) #ensures consistent ordering of datasets
print("Analyzed Datasets: "+str(datasets))

In [48]:
fi_dir_48 = 'data/LGHv4_uniformFI/KeyFileCopy/lgh_analysis_train_ryan_48hrvars_noimpute_2021-05-06/results/FI/'
fi_dir_all = 'data/LGHv4_uniformFI/KeyFileCopy/lgh_analysis_train_ryan_allvars_noimpute_2021-05-06/results/FI/'

In [205]:
series = list()
feats = None
for f in fi_files:
    df = pd.read_csv(fi_dir_all+f)
    if not feats:
        feats = df.abs().mean().keys().to_list()
        series.append(pd.Series(feats, name='feats'))
    fi_avgrank = df.abs().mean().rank(ascending=False).values
    series.append(pd.Series(fi_avgrank, name=f.partition('_')[0]))

finaldf = pd.concat(series, axis=1).set_index('feats')
finaldf['MeanRank'] = finaldf.mean(axis=1)
finaldf.sort_values(by='MeanRank', inplace=True)
finaldf.columns.name = 'algos'
inputdf = pd.DataFrame(finaldf.stack(), columns=['ranked']).reset_index()

In [221]:
source = ColumnDataSource(inputdf)
mapper = LinearColorMapper(palette=Cividis256, low=inputdf.ranked.min(), high=inputdf.ranked.max())

tools=["wheel_zoom", "pan", "reset"]
p = figure(plot_width=900,
            plot_height=1600,
            title="FI Heatmap (All Variables)",
            x_range=list(finaldf.columns),
            y_range=list(reversed(finaldf.index)),
            tools=tools,
            toolbar_location='left',
            x_axis_location="above"
            )
p.rect(x="algos",
        y="feats",
        width=1,
        height=1,
        source=source,
        line_color="white",
        fill_color={"field":"ranked", "transform": mapper},
        )
tooltips = [("algo", "@algos"),
            ("feature", "@feats"),
            ("rank", "@ranked")]

hover = HoverTool(tooltips = tooltips)
p.add_tools(hover)
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "14px"
p.title.text_font_size = '24px'
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = 1.0

output_file('FI_All_heatmap.html')
save(p)

'/data2/home/syhw/LGH/FI_All_heatmap.html'