In [None]:
import os, sys, pickle
from sklearn import metrics
from sklearn.metrics import make_scorer
import pandas as pd
import numpy as np
import seaborn as sns
import textwrap

code_dir = '/'.join(os.getcwd().split('/')[:-1])
main_code = os.path.join(code_dir, 'main_code')
sys.path.append(main_code)

import matplotlib.colors as mcol
import matplotlib.cm as cm
from yellowbrick.classifier import ROCAUC
import matplotlib.pyplot as plt

from model_class import model, model_results
import plotting_methods as pm
import model_methods as mm


# machine leanring methods
from xgboost import XGBClassifier
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# my methods 
import locations
import general_methods
import df_methods

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
def get_scores(run_name, clf, test_x, test_y, ret_type = None):
    #test_y = test_y+1
    predict_y = clf.best_estimator_.predict(test_x)
    acc = metrics.accuracy_score(predict_y, test_y)
    f1 = metrics.f1_score(predict_y, test_y, average = 'weighted')
    recall = metrics.recall_score(predict_y, test_y, average = 'weighted', zero_division = 0)
    precision = metrics.precision_score(predict_y, test_y.tolist(), average = 'weighted', zero_division = 0)
    auroc = clf.best_score_
    
    if ret_type == None:
        return acc, f1, recall, precision  
    if ret_type.lower() == 'dict':
        return {'run_name': run_name, 'accuracy':acc, 'f1': f1, 'recall':recall, 'precision':precision, 'auroc': auroc}
    if ret_type.lower() == 'list':
        return [acc, f1, recall, precision]
    
def build_df(res_dict):
    res_df = pd.DataFrame(columns = ['df_type', 'phrase', 'model_method', 'runtime'])
    for k,v in res_dict.items():
        df_type, phrase, model_method, clf, runtime, test_x, test_y = v
        row_dict = {'df_type': df_type, 'phrase':phrase, 'model_method':model_method, 'runtime':runtime}
        row = pd.Series(row_dict).to_frame().T
        res_df = pd.concat([res_df, row], ignore_index = True, axis = 0)
    return res_df

def find_entry(model_id):
    run_name = combine_df.loc[combine_df.loc[:,'model_id'] == model_id, 'run_name'].values.item()
    for k,v in merge_dict.items():
        if k == run_name:
            return k, v 
        
def pickle_exists():
    save_dir = locations.get_locations('save_dir')
    save_date = locations.get_locations('save_date')
    pkl_name = f'{save_date}_model_results.pkl'   
    load_pickle_path = os.path.join(save_dir, pkl_name)
    return os.path.isfile(load_pickle_path)
    
    
def load_pickle():
    save_dir = locations.get_locations('save_dir')
    save_date = locations.get_locations('save_date')
    pkl_name = f'{save_date}_model_results.pkl'
    load_pickle_path = os.path.join(save_dir, pkl_name)
    if pickle_exists():
        with open(load_pickle_path, 'rb') as f:
            model_list = pickle.load(f)
    return model_list


def save_pickle(res_list):
    save_dir = locations.get_locations('save_dir')
    save_date = locations.get_locations('save_date')
    pkl_name = f'{save_date}_model_results.pkl'
    save_pickle_path = os.path.join(save_dir, pkl_name)
    with open(save_pickle_path, 'wb') as f:
        pickle.dump(res_list, f, protocol=pickle.HIGHEST_PROTOCOL)
        
def clean_results_df(df):
    df.columns = [x.replace('_', ' ').title() for x in df.columns]
    for col_num in range(4,9):
        df.iloc[:,col_num] = df.iloc[:,col_num].round(3)
    df.loc[:,'Structure'] = vec_clean_cols(df.loc[:,'Structure'])
    df.loc[:,'Run Name'] = vec_clean_cols(df.loc[:,'Run Name'])
    
    
def clean_method(method):
    methods = ['logr', 'svm', 'xgb', 'rf']
    new_name = ['Logistic', 'SVM', 'XGBoost', 'Random Forest']
    return new_name[methods.index(method)]

def clean_structure_name(structure):
    structures = ['Left_wm', 'Right_wm', 'Left_gm', 'Right_gm', 'Right_cerebellum', 'Left_cerebellum', 'Deep_grey']
    new_structures = ['Left White Matter', 'Right White Matter', 'Left Grey Matter', 'Right Grey Matter', 'Right Cerebellum', 'Left Cerebellum', 'Deep Grey']
    if structure in structures:
        return new_structures[structures.index(structure)]
    else:
        return structure

def clean_column_name(name):
    name = name.replace('_', ' ').split()
    for index in range(len(name)):
        if len(name[index]) < 4 and name[index].lower() != 'raw':
            name[index] = name[index].upper()
        else:
            name[index] = name[index].title()
    return ' '.join(name)
    
vec_clean_cols = np.vectorize(clean_column_name)  
vec_clean_methods = np.vectorize(clean_method)
vec_clean_structure_name = np.vectorize(clean_structure_name)

In [None]:
def wrap_text(text, size = 20):
    wrap = textwrap.wrap(text, size)
    return '\n'.join(wrap)

def color_labels(color_dict, ticks):
    for item in ticks:
        text = item._text
        if text not in color_dict:
            continue
        item._color = color_dict[text]
        
            
def construct_feature_to_color(features):
    colors = ['black', 'red', 'blue', 'green', 'm']
    index = 0
    switch_index = [3, 19, 43]
    color_dict = {}
    for enum, feature_name in enumerate(features):
        if enum in switch_index:
            index+=1
        color_dict[feature_name] = colors[index]
    return color_dict

def clean_feature_names(feature_list, left=False) -> list:
    remove_list = ['Original Shape', 'Original Firstorder', 'Original Glcm', 'Original Glrlm']
    remove_list = [x.lower() for x in remove_list]
    
    find_l = ['run', 'variance', 'length', 'level', 'high', 'low', 'mean', 'absolute', 
              'percentile', 'range', 'priminence', 'shade', 'tendency', 'average', 'entropy',
             'probability', 'normalized']
    new_list = []
    for item in feature_list:
        item = item.lower()
        for rem in remove_list:
            item = item.replace(rem, '')
        
        for find in find_l:
            repl = f' {find} '
            item = item.replace(find, repl)
        
        
        
        if len(item) < 6:
            item = item.upper()
        else:
            item = item.title()
        item = " ".join(item.split())
        new_list.append(item.strip())
    if left:
        lens = [len(x) for x in new_list]
        max_len = max(lens)
        new_list = [x.ljust(max_len, ' ') for x in new_list]
        return new_list
    else:
        return new_list
    
def clean_column_name(name):
    name = name.replace('_', ' ').split()
    for index in range(len(name)):
        if len(name[index]) < 4 and name[index].lower() != 'raw':
            name[index] = name[index].upper()
        else:
            name[index] = name[index].title()
    return ' '.join(name)
    
vec_clean_cols = np.vectorize(clean_column_name)   

In [None]:
def feature_importance(mobj, number = 10, save_name = None):
    mod = mobj.mod
    fig = plt.figure(facecolor = 'white', figsize = (10,9))
    size_cat = ['Small','Normal', 'Large']
    indexes = [0,1, 2]
    fig_index = 0
    all_colors = []
    for index, size in zip(indexes, size_cat):
        fig_index+=1
        ax = fig.add_subplot(len(indexes),1,fig_index)
        ax.set_title(f'Size Class: {size}', fontweight = 'bold', fontsize = 15)
        ax.grid(False)
        names = mobj.model_features
        
        fi_all = mm.get_feature_importance(mobj)
        if mobj.method == 'RF':
            fi = fi_all
        else:
            fi = fi_all[index]
        merge_l = [(name, val) for name, val in zip(names, fi)]
        merge_l.sort(key = lambda x: x[1], reverse = True)
        x, y = zip(*merge_l)
        
        x = x[:number]
        y = y[:number]
        
        # make color_dict
        feature_struc = mobj.structure
        clean_names = [name.replace(feature_struc,'').replace('_', ' ').strip().title() for name in names]
        clean_names = clean_feature_names(clean_names)
        color_dict = construct_feature_to_color(clean_names)
        color_order = list(color_dict.values())
        
        merge_l = [(col, val) for col, val in zip(color_order, fi)]
        merge_l.sort(key = lambda x: x[1], reverse = True)
        colors, _ = zip(*merge_l)
        colors = colors[:number]
        
        x = [name.replace(feature_struc,'').replace('_', ' ').strip().title() for name in x]
        x = clean_feature_names(x)
        x = [wrap_text(name, 18) for name in x]
        ax.bar(x, y, width = 0.75)
        ax.set_xticklabels(x, ha = 'right', rotation = 50)
        all_colors.append(colors)

        for item, color in zip(ax._axes.get_xticklabels(), colors):
            item._color = color
        plt.tight_layout()
    
    if save_name is not None:
        plt.savefig(save_name, dpi = 300)
    return all_colors

In [None]:
def build_heatmaps(mods_l, col_name, save_dir = None, show = True, linewidth = 0.4):
    #features = mods_l[0].df.iloc[:-2,:].index.tolist()
    features = mods_l[0].mod.feature_names_in_
    feature_struc = mods_l[0].structure
    features = [x.replace(feature_struc,'').replace('_', ' ').strip().title() for x in features]
    features = clean_feature_names(features, False)
    size_dict = {0:'Small', 1:'Normal', 2:'Large'}
    for size_ind in [0,1,2]:
        res_df = pd.DataFrame(index = features)
        for enum, mobj in enumerate(mods_l):
            fi = mm.get_feature_importance(mobj)
            if mobj.method == 'RF':
                fi = fi
            else:
                fi = fi[size_ind]
            res_df.insert(0, clean_column_name(mobj.__dict__[col_name]), fi)

        norm_df = (res_df/res_df.sum(axis=0))

        fig = plt.figure(facecolor = 'white', dpi = 200, figsize = (5,9))
        ax = fig.add_subplot(111)
        ax = sns.heatmap(norm_df, cmap = 'Reds', linewidths = linewidth, yticklabels=True)
        title = f'Relative Feature Importance for Each {col_name.title()}\nSize Class: {size_dict[size_ind]}'
        centerx = np.mean(ax._axes.get_position().intervalx) * 0.7
        fig.suptitle(title, fontsize = 20, fontweight = 'bold', x = centerx, ha = 'center')
        labels = [x.get_text() for x in ax.get_xticklabels()]
        ax.set_xticklabels(labels, rotation = 45, ha = 'right', fontweight = 'bold', color = 'black')
        ax.set_yticklabels(ax.get_yticklabels(), fontweight = 'bold', fontsize = 8)

        
        colors = ['black', 'red', 'blue', 'green', 'm']
        index = 0
        switch_index = [3, 19, 43]
        for enum, item in enumerate(ax._axes.get_yticklabels()):
            if enum in switch_index:
                index+=1
            item._color = colors[index]

        if save_dir is not None:
            save_path = os.path.join(save_dir, 'cm1_updated_heatmap_{}_{}_{}.png'.format(linewidth,col_name, size_dict[size_ind]))
            plt.savefig(save_path, dpi = 200, bbox_inches = 'tight')
        if show:
            plt.show()
        else:
            plt.close()
    return ax
            
def save_rf_tree(mobj, save_dir, est_ind = None):
    if est_ind is None:    
        est_ind = random.randrange(0,mobj.mod.n_estimators+1)
        
    esti = mobj.mod.estimators_[est_ind]
    dot_file = os.path.join(save_dir, 'temp_tree.dot')
    export_graphviz(esti, out_file = dot_file, 
                    feature_names = mobj.model_features,
                    class_names = ['small', 'normal', 'large'],
                    rounded = True, proportion = False, 
                    precision = 2, filled = True)
    
    tree_png = os.path.join(save_dir, '{}_esti_{}_tree.png'.format(mobj.run_name, est_ind))
    (graph,) = pydot.graph_from_dot_file(dot_file)
    graph.write_png(tree_png)
    os.remove(dot_file)
    
    
def color_dict_bar(color_dict, save_path = None):
    fig = plt.figure(figsize = (6,3), dpi = 200, facecolor = 'white')
    ax = fig.add_subplot(111)
    colors = ['black', 'red', 'blue', 'green']
    merge_list = [(k,v, c) for (k, v), c in zip(color_dict.items(), colors)]
    merge_list.sort(key = lambda x: x[1], reverse = True)
    groups, values, colors = zip(*merge_list)
    groups = update_group_name(groups)
    plt.grid(False)
    ax.bar(groups, values, color = colors)
    if save_path is not None:
        plt.savefig(save_path, dpi = 200, transparent = True)
    plt.show()

def update_group_name(groups):
    original = ['shape', 'firstorder', 'glcm', 'glrlm']
    ret_val = ['Shape', 'FirstOrder', 'GLCM', 'GLRLM']
    new_group = []
    for g in groups:
        new_group.append(ret_val[original.index(g)])
    return new_group

def make_pie_relative(color_dict, save_path = None):
    fig = plt.figure(figsize = (2,2), dpi = 200)
    ax = fig.add_subplot(111)
    ax.grid(False)
    
    vals = list(color_dict.values())
    vals = [round(x, 3) for x in vals]
    labs = list(color_dict.keys())
    cols = ['black', 'red', 'blue', 'green']
    clean_vals = []
    clean_labs = []
    clean_cols = []
    for v,l,c in zip(vals, labs, cols):
        if v != 0:
            clean_vals.append(v)
            clean_labs.append(l)
            clean_cols.append(c)
            
    patches, texts, _ = ax.pie(clean_vals, colors = clean_cols, explode = [0.01]*len(clean_vals), labeldistance = 1.1,
           textprops = {'fontsize':5, 'color':'white', 'fontweight':'bold'}, autopct = '%.0f%%', normalize=False)
    
    #plt.legend(patches, clean_labs, loc=(0, 0))
    if save_path is not None:
        plt.savefig(save_path, dpi = 200, transparent = True)
    plt.show()

# 
# Below Actually loads results and then you can choose what you want to visualize
# 

In [None]:
# True or False depending on whether you ran all the data or only 
# half when tuning/training/testing with hyperparams_tuning_and_training
load_two_results =  

try:
    load_two_results
    
    if load_two_results:
        # this is run when combining results
        res_dir = ''  # dir where results are stored
        dict_0_dir = os.path.join(res_dir, '') 
        dict_0_pkl = os.path.join(dict_0_dir,'.pkl')

        dict_1_dir = os.path.join(res_dir, '')
        dict_1_pkl = os.path.join(dict_1_dir,'.pkl')
        
        with open(dict_0_pkl, 'rb') as f:
            dict_0 = pickle.load(f)
            
        with open(dict_1_pkl, 'rb') as f:
            dict_1 = pickle.load(f)
            
        dict_results = dict_0.copy()
        dict_results.update(dict_2)
        
    else:
        # this is run when no combing of results is required
        dict_result_locations = '.pkl'
        with open(dict_result_locations, 'rb') as f:
            dict_results = pickle.load(f)
            
except Exception as e:
    print('Failed to run properly')
    print(f'Error: {str(e)}')
    
            
# information portion of dataframe
info_df = build_df(dict_results)

In [None]:
# results portion
res_df = pd.DataFrame(columns = ['run_name', 'accuracy', 'f1', 'recall', 'precision', 'auroc'])
for k,v in merge_dict.items():
    df_type, phrase, model_method, clf, runtime, test_x, test_y = v
    res = pd.Series(get_scores(k, clf, test_x, test_y, 'dict')).to_frame().T
    res_df = pd.concat([res_df, res], ignore_index = True, axis = 0)

In [None]:
# generate DF dictionary
# this is to get access to the full df 

data_loc = locations.get_locations('data_csv_loc')
data = pd.read_csv(data_loc, sep=',', header = 0, index_col = 0)
scaled_data = df_methods.scale_df(data)  
dfs, sdfs = df_methods.setup_data(data, scaled_data)
bdfs, sbdfs = df_methods.balance_data(dfs, sdfs)
phrases = general_methods.return_phrases()

model_names = ['rf', 'logr', 'svm', 'xgb']
merge_df_lists = [sbdfs, bdfs]
df_dict = {}
df_obs = []
cv_seed = 1234

df_types = ['Scaled_balanced', 'balanced']
for df_list, df_type in zip(merge_df_lists, df_types):
    for df, phrase in zip(df_list, phrases):
        df_obs.append(df.shape[1])
        df = df.T
        x,y = df.iloc[:, :-2], df.iloc[:, -1]
        y = y+1 #xgb needs positive class values
        train_x, test_x, train_y, test_y = train_test_split(x,y, train_size = 0.8, random_state = cv_seed)
        for model_method in model_names:
            model_name_string = f'{df_type}_{phrase}_{model_method}'
            df_dict[model_name_string] = (df.T, x, y, train_x, test_x, train_y, test_y)


In [None]:
combine_df = pd.concat([info_df, res_df], axis = 1)
combine_df.insert(combine_df.shape[1], 'model_id', [x for x in range(len(combine_df))])

In [None]:
re_run = False
list_missing = not(pickle_exists())
    
if re_run or list_missing:
    print('Building list...')
    model_list = []
    for enum,((lab, data), (k1,v1), (k2, v2)) in enumerate(zip(combine_df.iloc[:, :].iterrows(), merge_dict.items(), df_dict.items())):
        df_type, phrase, model_method, clf, runtime, test_x, test_y = v1
        mod = model()
        mod.mod = clf.best_estimator_
        mod.accuracy = data.accuracy
        mod.f1 = data.f1
        mod.precision = data.precision
        mod.recall = data.recall
        mod.avg_score = np.mean([data.accuracy, data.f1, data.precision, data.recall])
        mod.run_name = k1
        mod.method = data.model_method
        mod.structure = phrase
        mod.model_id = data.model_id
        mod.auroc = data.auroc
        mod.df = v2[0]
        mod.model_features = v2[0].index.tolist()[:-2]
        if 'Scaled' in k:
            mod.df_name = 'scaled'
        else:
            mod.df_name = 'scaled_balanced'
        model_list.append(mod)
        
    result_list = model_results(model_list)
    result_list.sort_results(print_res = False)     
    save_pickle(result_list)    
        
else:
    print('Loading list...')
    result_list = load_pickle()

In [None]:
method_models = result_list.top_per_method()
method_results = model_results(method_models)
method_result_df = method_results.toDF()

In [None]:
structure_models = result_list.top_per_feature()
structure_results = model_results(structure_models)
structure_result_df = structure_results.toDF()

In [None]:
### Clean up and save top performing structure and method results to excel


feature_df = structure_result_df.copy()
method_df = method_result_df.copy()

drop_list = ['run_name', 'df_name', 'model_id', 'avg_score', 'auroc']

feature_table_df = feature_df.drop(drop_list, axis = 1, inplace = False)
methods_table_df = method_df.drop(drop_list, axis = 1, inplace = False)
#all_table_df = res_df.drop(['run_name', 'df_name', 'model_id', 'avg_score'], axis = 1, inplace = False)

feature_table_df.loc[:,'method'] = vec_clean_methods(feature_table_df.loc[:,'method'])
methods_table_df.loc[:,'method'] = vec_clean_methods(methods_table_df.loc[:,'method'])
#all_table_df.loc[:,'meethod'] = vec_clean_methods(all_table_df.loc[:,'method'])

feature_table_df.loc[:,'structure'] = vec_clean_structure_name(feature_table_df.loc[:,'structure'])
methods_table_df.loc[:,'structure'] = vec_clean_structure_name(methods_table_df.loc[:,'structure'])
#all_table_df.loc[:,'structure'] = vec_clean_structure_name(all_table_df.loc[:,'structure'])

feature_table_df = feature_table_df.rename({"method": "Classifier Method"}, axis = 1)
methods_table_df = methods_table_df.rename({"method": "Classifier Method"}, axis = 1)
#all_table_df = all_table_df.rename({"method": "Classifier Method"}, axis = 1)


feature_table_df.columns = [x.title() for x in feature_table_df.columns]
methods_table_df.columns = [x.title() for x in methods_table_df.columns]

feature_table_df.iloc[:,2:] = feature_table_df.iloc[:,2:].round(3)
methods_table_df.iloc[:,2:] = methods_table_df.iloc[:,2:].round(3)

In [None]:
excel_save_dir = locations.get_locations('excel_results')
save_date_string = locations.get_locations('save_date')
for df, name in zip([feature_table_df, methods_table_df], ['features', 'methods', 'all']):
    save_path = os.path.join(excel_save_dir, f'New_{save_date_string}_{name}_table_version.xlsx')
    df.to_excel(save_path, index = False, header = True)

In [None]:
# Print best performing models by category
head_num = 5
for colname in combine_df.columns[5:]:
    print(colname)
    display(combine_df.sort_values(colname, axis = 0, ascending = False).head(head_num))

# 
# Confusion matrix
# 

In [None]:
# can choose whichever one works best for you
cm1 = mcol.LinearSegmentedColormap.from_list("MyCmapName",['w', 'w', 'y', 'y', 'orange','orange', 'r', 'r', 'maroon'])
cm2 = mcol.LinearSegmentedColormap.from_list("MyCmapName",['w', 'y', 'orange', 'r', 'maroon'])

In [None]:
# find what ever model object you'd like and unpack it. 
res = find_entry(9)
df_type, phrase, model_method, clf, runtime, test_x, test_y = res[1]
run_name = res[0]
model = clf.best_estimator_

In [None]:
predict_y = model.predict(test_x)
cnf_matrix = metrics.confusion_matrix(test_y, predict_y)
save_dir = locations.get_locations('confusion_matrix')
title = f'confusion_{run_name}'
save_path = os.path.join(save_dir + title)
pm.plot_matrix(cnf_matrix, save_dir = save_dir, title = title, disp = True, cmap = cm)

# 
# AUROC
# 

In [None]:
# find what ever model object you'd like and unpack it. 
res = find_entry(9)
df_type, phrase, model_method, clf, runtime, test_x, test_y = res[1]
run_name = res[0]
model = clf.best_estimator_

In [None]:
title = 'default' #whatever you'd like 
roc_dir = locations.get_locations('ROC_dir')
pm.plot_yellowbrick_roc_xy(model, test_x, test_y, title, save_path = roc_dir, disp = True)

# 
# Feature Importance
# 

In [None]:
figure_model = result_list.get_model(9)
save_dir = locations.get_locations('importance_dir')
bar_title = os.path.join(save_dir, 'bar_plots.png')
colors_list = feature_importance(figure_model, save_name = bar_title)

# 
# Realtive Feature Importance by Class
# 

In [None]:
figure_model = result_list.get_model(9)
features = figure_model.model_features

#Need to remove feature_name and category from features before running
# I.e. rem_name = 'Left_wm_original_'
rem_name = '' 

features = [x.replace(rem_name, '') for x in features]
group_dict = {'shape': 'shape', 'firstorder': 'firstorder', 'glcm':'glcm', 'glrlm': 'glrlm'}
feature_dict = {}
for feat in features:
    prefix = feat.split('_')[0]
    if prefix in group_dict:
        feature_dict[feat] = group_dict[prefix]

def calc_group_fi(tup_list):
    group_rfi_dict = {}
    for tup in tup_list:
        if tup[1] in group_rfi_dict:
            group_rfi_dict[tup[1]]+= tup[2]
        else:
            group_rfi_dict[tup[1]] = tup[2]

    return group_rfi_dict
    
def return_tup_list(fi_list, feature_dict):
    tup_list = []
    for imp, (feature_name, feature_group) in zip(fi_list, feature_dict.items()):
        tup_list.append((feature_name, feature_group, imp))
    return tup_list



In [None]:
figure_model = result_list.get_model(9)
fi = mm.get_feature_importance(figure_model)
relative_fi = []
color_dicts = []
for i in range(len(fi)):
    rfi = list(fi[i]/fi[i].sum(axis = 0))
    relative_fi.append(rfi)
    tup_list = return_tup_list(rfi, feature_dict)
    color_dict = calc_group_fi(tup_list)
    color_dicts.append(color_dict)

In [None]:
order = ['glcm', 'glrlm', 'firstorder', 'shape']
colors = '#4d5ce3', '#73d461', '#cc434c', 'black'
fig = plt.figure(facecolor = 'white', figsize = (6,2), dpi = 200)
ax = fig.add_subplot(111)
sizes = ['Large', 'Normal', 'Small']
text_color = 'white'
prev_sm, prev_nm, prev_lg = 0, 0, 0
for group, color in zip(order, colors):
    sm = color_dicts[0][group]
    nm = color_dicts[1][group]
    lg = color_dicts[2][group]
    #vals = [sm, nm, lg]
    vals = [lg, nm, sm]
    #bottoms = [prev_sm, prev_nm, prev_lg]
    bottoms = [prev_lg, prev_nm, prev_sm]
    container = ax.barh(sizes, vals, color = color, left = bottoms)
    
    percents = np.round([x*100 for x in [sm, nm, lg]]).astype(int)
    percents = [str(x) + '%' if x > 3 else '' for x in percents] #Change >3 to get additional percentage values to show up.
    
    text_l = ax.bar_label(container, percents, label_type = 'center', color = text_color, fontsize = 8, padding = 0, weight = 'extra bold')
        
    prev_sm = sm + prev_sm
    prev_nm = nm + prev_nm
    prev_lg = lg + prev_lg

ax.set_yticklabels(sizes, fontweight = 'bold', color = 'black')

ax.set_xlabel('Relative Importance (%)', fontsize = 10, fontweight = 'bold', color = 'black')
save_dir = locations.get_locations('save_dir')
save_path = os.path.join('category_relative_importance.png')
plt.savefig(save_path, dpi = 500, bbox_inches = 'tight')
plt.show()

# 
# Heatmaps
# 

In [None]:
method_models = result_list.top_per_method()
structure_models = result_list.top_per_feature()

hm_dir = locations.get_locations('heatmaps')
os.makedirs(hm_dir, exist_ok = True)

ax = build_heatmaps(structure_models, 'structure', save_dir = hm_dir, show = True, linewidth = .4)
ax = build_heatmaps(method_models, 'method', save_dir = hm_dir, show = False)