In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from tqdm import tqdm
import seaborn as sns

import pandas as pd
pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", 100)
pd.set_option("display.min_rows", 50)
pd.set_option("display.precision", 8)
from pandas.api.types import is_numeric_dtype

import joblib 
import json
import time
from tableone import TableOne
from scipy import sparse
pd.options.mode.chained_assignment = None 

data_dir ='./data'
basedir = os.getcwd()

In [None]:
import importlib
import clinprediction.omop_fx
importlib.reload(clinprediction.omop_fx)

import clinprediction.patient_fx
importlib.reload(clinprediction.patient_fx)

import clinprediction.match_fx
importlib.reload(clinprediction.match_fx)

import clinprediction.model_fx
importlib.reload(clinprediction.match_fx)

## Parameters

In [None]:
load_options = ['testdate','AD','control'] 
in_dir = basedir + 'data/'
pdir = basedir + 'cohort_selection/'
output_dir = basedir + 'cohort_selection_out/'

if load_options is not None:
    options = load_in_options(*load_options)
    odir_main = options['odir_main']
    demo_cols = options['demo_cols']
    cols_match_num = options['match_params']['cols_match_num']
    cols_match_vis = options['match_params']['cols_match_vis']
    cols_match_cat = options['match_params']['cols_match_cat']

## Load in Data

In [None]:
from clinspokeprediction.omop_fx import OMOPData

start = time.time()
omopdata = OMOPData(omopdir = 'data/omop/')

# Read in OMOP information for patients of interest.
try: 
    omopdata.load_compressed(pdir)
except: 
    omopdata.read_in_omop_csv(directory = pdir, read_in_controls = True)
    omopdata.save_compressed()
    
# Concept look-up dictionary
conceptdict = omopdata.concepts.set_index('concept_id')['concept_name'].to_dict()

print(omopdata.size_of_data_col('iscontrol'))
print('Finished reading in OMOP data. took {} minutes'.format((time.time() - start) / 60))

In [None]:
# load in patients
from clinspokeprediction.patient_fx import read_in_patients, age_visit_timefilt, filter_pts
from clinspokeprediction.match_fx import match_patients

timefilt_min = np.array(options['timefilt_range']).min()

if 'allpts_timefiltmin_file' in options:
    allpts = pd.read_csv(options['allpts_timefiltmin_file'])
else: 
    raise Exception('Run 2_predictAD.ipynb first')
    
person_id_train = np.load(options['person_id_train_all_file'])
person_id_test = np.load(options['person_id_test_all_file'])
print('train number pid:', len(person_id_train))
print('test number pid:', len(person_id_test))


In [None]:
from clinspokeprediction.omop_fx import filter_omopdata_by_time
for timefilt in options['timefilt_range']:
    odir_tf = options['odir_main'] + str(timefilt) + '/'
    os.makedirs(odir_tf, exist_ok = True)

    print(odir_tf + 'omop_count_demo_visit.joblib exists.')
    print('getting information at timefilt:{}'.format(timefilt))
    omop_pt_tf_input = joblib.load(odir_tf + 'omop_count_demo_visit.joblib')
    allpt_tf = omop_pt_tf_input['allpt_tf']
    allpt_tf_train = omop_pt_tf_input['allpt_tf_train'] 
    allpt_tf_test = omop_pt_tf_input['allpt_tf_test']
    pts_train_tf = omop_pt_tf_input['train_pts']
    pts_test_tf = omop_pt_tf_input['test_pts']
    feat_concepts = omop_pt_tf_input['feat_concepts']

    allptomop = pd.read_csv(odir_tf + 'patient_sentence_long.csv')
    
    try: 
        train_personid = np.load(options['train_personid_mintimefilt_file'])
        test_personid = np.load(options['test_personid_mintimefilt_file'])
    except: 
        raise Exception('Run 2_predictAD.ipynb first')
    
    print('train:',pts_train_tf.shape, train_personid.shape)
    print('test:',pts_test_tf.shape, test_personid.shape)
    
    # Training data prep
    if os.path.isfile(odir_tf + 'model_unmatched_input_data.joblib'):
        print('loading in saved model inputs... ')
        unmatched_model_inputs = joblib.load(odir_tf + 'model_unmatched_input_data.joblib')

        X_train = unmatched_model_inputs['X_train']
        X_test = unmatched_model_inputs['X_test']
        feature_names = unmatched_model_inputs['feature_names']
        varthresh = unmatched_model_inputs['varthresh']
        y_train = unmatched_model_inputs['y_train']
        y_test = unmatched_model_inputs['y_test']

        print('variance thresholded n features:', varthresh.get_support(1).shape)
        feature_names_var = feature_names[varthresh.get_support(1)]
        feature_name_info = feature_names.to_frame('concept_id').rename_axis('')\
            .merge(omopdata.concepts.groupby('concept_id').head(1), 
                   on = 'concept_id', how = 'left').set_index('concept_id')
        print('X_train shape: {}, X_test shape: {}'.format(X_train.shape, X_test.shape))
        print('length of y_train: {}. \n\tsum of y_train: {}. \n\tmean of y_train: {:0.07f}'.format(\
                        len(y_train), y_train.sum(), y_train.mean()))
        print('length of y_test: {}. \n\tsum of y_test: {}. \n\tmean of y_test: {:0.07f}'.format(\
                    len(y_test), y_test.sum(), y_test.mean()))
    else: 
        raise Exception('Run 2_predictAD.ipynb first')
        
    ########
    # Stratify by Sex
    ########
    person_id_train_F = np.intersect1d(allpt_tf.query("Sex == 'Female'").index, person_id_train)
    person_id_train_M = np.intersect1d(allpt_tf.query("Sex == 'Male'").index, person_id_train)
    print('train all: {}, train all F: {}, train all M: {}'.format(\
                person_id_train.shape, person_id_train_F.shape, person_id_train_M.shape))
    person_id_test_F = np.intersect1d(allpt_tf.query("Sex == 'Female'").index, person_id_test)
    person_id_test_M = np.intersect1d(allpt_tf.query("Sex == 'Male'").index, person_id_test)
    print('test all: {}, test all F: {}, test all M: {}'.format(\
                person_id_test.shape, person_id_test_F.shape, person_id_test_M.shape))

    train_personid_F = np.intersect1d(allpt_tf.query("Sex == 'Female'").index, train_personid)
    train_personid_M = np.intersect1d(allpt_tf.query("Sex == 'Male'").index, train_personid)
    print('train: {}, train F: {}, train M: {}'.format(train_personid.shape, train_personid_F.shape, train_personid_M.shape))
    test_personid_F = np.intersect1d(allpt_tf.query("Sex == 'Female'").index, test_personid)
    test_personid_M = np.intersect1d(allpt_tf.query("Sex == 'Male'").index, test_personid)
    print('test: {}, test F: {}, test M: {}'.format(test_personid.shape, test_personid_F.shape, test_personid_M.shape))

    # demographics of updated pts
    print('train F:',person_id_train_F.shape, train_personid_F.shape)
    print('test F:',person_id_test_F.shape, test_personid_F.shape)
    display_table(allpt_tf.loc[np.concatenate((train_personid_F,test_personid_F))].reset_index(),  
                  groupby = 'AD', options = options)

    print('train M:',person_id_train_M.shape, train_personid_M.shape)
    print('test M:',person_id_test_M.shape, test_personid_M.shape)
    display_table(allpt_tf.loc[np.concatenate((train_personid_M,test_personid_M))].reset_index(),  
                  groupby = 'AD', options = options)

    ########
    # get the input data (X) and the labels (y) for train and test. Apply variance threshold to X.
    ########
    # X
    X_train_F = X_train.loc[train_personid_F]; X_train_M = X_train.loc[train_personid_M]
    print('shape of X_train: {}, X_train_F: {}, X_train_M: {}'.format(X_train.shape, X_train_F.shape, X_train_M.shape))
    varthresh_F = VarianceThreshold().fit(X_train_F); varthresh_M = VarianceThreshold().fit(X_train_M)
    N_FEATURES_F = varthresh_F.get_support().sum(); N_FEATURES_M = varthresh_M.get_support().sum()
    print('variance thresholded n features F:{}, M:{}'.format(varthresh_F.get_support(1).shape, varthresh_M.get_support(1).shape))
    X_test_F = X_test.loc[test_personid_F]; X_test_M = X_test.loc[test_personid_M]
    print('shape of X_test: {}, X_test_F: {}, X_test_M: {}'.format(X_test.shape, X_test_F.shape, X_test_M.shape))


    # y
    y_train_F = allpt_tf.loc[train_personid_F][dxgroup].to_numpy()
    print('length of y_train_F: {}. \n\tsum of y_train_F: {}. \n\tmean of y_train_F: {:0.07f}'.format(\
                    len(y_train_F), y_train_F.sum(), y_train_F.mean()))
    y_test_F = allpt_tf.loc[test_personid_F][dxgroup].to_numpy()
    print('length of y_test_F: {}. \n\tsum of y_test_F: {}. \n\tmean of y_test_F: {:0.07f}'.format(\
                len(y_test_F), y_test_F.sum(), y_test_F.mean()))
    y_train_M = allpt_tf.loc[train_personid_M][dxgroup].to_numpy()
    print('length of y_train_F: {}. \n\tsum of y_train_M: {}. \n\tmean of y_train_M: {:0.07f}'.format(\
                    len(y_train_F), y_train_M.sum(), y_train_M.mean()))
    y_test_M = allpt_tf.loc[test_personid_M][dxgroup].to_numpy()
    print('length of y_test_M: {}. \n\tsum of y_test_M: {}. \n\tmean of y_test_M: {:0.07f}'.format(\
                len(y_test_M), y_test_M.sum(), y_test_M.mean()))

    X_train_F2 = varthresh_F.transform(X_train_F); X_train_M2 = varthresh_M.transform(X_train_M)
    feature_names_var_F = feature_names[varthresh_F.get_support(1)]; feature_names_var_M = feature_names[varthresh_M.get_support(1)]
    X_test_F2 = X_test_F[feature_names_var_F].to_numpy(); X_test_M2 = X_test_M[feature_names_var_M].to_numpy();
    
    # prep output
    odir_tf_f = odir_tf + 'Female/'
    odir_tf_m = odir_tf + 'Male/'
    os.makedirs(odir_tf_f, exist_ok = True); os.makedirs(odir_tf_m, exist_ok = True);
    if 'odir_tf_strat' not in options:
        options['odif_tf_strat'] = {'Female':odir_tf_f, 'Male':odir_tf_m}
        save_updated_options(options)
        
    #######
    # Random Forest
    ######
    # For FEMALES
    np.random.seed(1100)
    if os.path.isfile(odir_tf_f+'rf_unmatched_model.joblib'):
        print('reading in saved model...')
        rf_unmatched_dict_F = joblib.load(fname)
        rf_feat_import_unmatched_F  = feature_context(rf_unmatched_dict_F['feat_import'], feature_name_info, 
                                         import_col = 'rf_import', modelkind = 'rf_unmatched')
    else: 
        pt_choice_F = np.concatenate((np.where(y_train_F)[0], 
                        np.random.choice(np.where(1-y_train_F)[0], int(y_train_F.sum())*ratio)))
        rf_unmatched_dict_F = rf_model(X_train_F2[pt_choice_F], y_train_F[pt_choice_F],
                     X_test_F2, y_test_F, feature_names_var_F, options, odir_tf = odir_tf_f, modelsuffix = '_unmatched')
        rf_feat_import_unmatched_F = feature_context(rf_unmatched_dict_F['feat_import'], feature_name_info, 
                            import_col = 'rf_import', modelkind = 'rf_unmatched', odir_tf = odir_tf_f)
        
    # for MAES
    if os.path.isfile(odir_tf_m+'rf_unmatched_model.joblib'):
        print('reading in saved model...')
        rf_unmatched_dict_M = joblib.load(odir_tf_m+'rf_unmatched_model.joblib')
        rf_feat_import_unmatched_M  = feature_context(rf_unmatched_dict_M['feat_import'], feature_name_info, 
                                         import_col = 'rf_import', modelkind = 'rf_unmatched')
    else: 
        pt_choice_M = np.concatenate((np.where(y_train_M)[0], 
                        np.random.choice(np.where(1-y_train_M)[0], int(y_train_M.sum())*ratio)))
        rf_unmatched_dict_M = rf_model(X_train_M2[pt_choice_M], y_train_M[pt_choice_M],
                     X_test_M2, y_test_M, feature_names_var_M, options, odir_tf = odir_tf_m, modelsuffix = '_unmatched', do_gridsearch = False)
        rf_feat_import_unmatched_M = feature_context(rf_unmatched_dict_M['feat_import'], feature_name_info, 
                            import_col = 'rf_import', modelkind = 'rf_unmatched', odir_tf = odir_tf_m)
        
    ######
    # Match Patients Within Sex Strata
    #####
    from clinspokeprediction.match_fx import match_patients
    cols_match_num = options['match_params']['cols_match_num'] + options['match_params']['cols_match_vis']
    cols_match_cat = options['match_params']['cols_match_cat']

    cohortpts_tf_train = allpt_tf_train[allpt_tf_train[dxgroup]==1].reset_index()
    controlpts_tf_train = allpt_tf_train[allpt_tf_train[dxgroup]==0].reset_index()
    print('TRAIN: \ncohortpts shape (prior): {}\ncontrolpts shape: {}'.format(cohortpts_tf_train.shape, controlpts_tf_train.shape))  
    cohortpts_tf_test = allpt_tf_test[allpt_tf_test[dxgroup]==1].reset_index()
    controlpts_tf_test = allpt_tf_test[allpt_tf_test[dxgroup]==0].reset_index()
    print('TEST: \ncohortpts shape (prior): {}\ncontrolpts shape: {}'.format(cohortpts_tf_test.shape, controlpts_tf_test.shape))
    
    fname = odir_tf_f + 'cohort_control_pt_train_tf.joblib'
    if os.path.isfile(fname):
        print('loading in matched females...')
        tt_temp = joblib.load(fname)
        cohortpts_tf_train_F = tt_temp['cohortpts_tf_train']; controlpts_tf_train_F = tt_temp['controlpts_tf_train']
        allpt_tf_matched_F = tt_temp['allpt_tf_matched']
        del tt_temp
    else: 
        cohortpts_tf_train_F = cohortpts_tf_train[cohortpts_tf_train.person_id.isin(train_personid_F)]
        controlpts_tf_train_F = controlpts_tf_train[controlpts_tf_train.person_id.isin(train_personid_F)]
        print('cohortpts_F shape: {}\ncontrolpts_F shape: {}'.format(cohortpts_tf_train_F.shape, controlpts_tf_train_F.shape))
        cohortpts_tf_train_F, controlpts_tf_train_F, _ = match_patients(cohortpts_tf_train_F, controlpts_tf_train_F, dxgroup, 
                        cols_match_cat = cols_match_cat, cols_match_num = cols_match_num, ratio = options['ratio'], return_split = True)
        allpt_tf_matched_F = cohortpts_tf_train_F.append(controlpts_tf_train_F).set_index('person_id')
        joblib.dump({'cohortpts_tf_train': cohortpts_tf_train_F, 'controlpts_tf_train': controlpts_tf_train_F, 
                     'allpt_tf_matched': allpt_tf_matched_F}, odir_tf_f + 'cohort_control_pt_train_tf.joblib')

        mytable = TableOne(allpt_tf_matched_F, columns = cols_match_num + cols_match_cat, 
                       groupby=dxgroup, categorical = cols_match_cat, smd = True, pval = True);
        display(mytable)
        mytable.to_csv(odir_tf_f + 'allpt_train_tf_matched.csv')

    fname = odir_tf_m + 'cohort_control_pt_train_tf.joblib'
    if os.path.isfile(fname):
        print('loading in matched males...')
        tt_temp = joblib.load(fname)
        cohortpts_tf_train_M = tt_temp['cohortpts_tf_train']; controlpts_tf_train_M = tt_temp['controlpts_tf_train']
        allpt_tf_matched_M = tt_temp['allpt_tf_matched']
        del tt_temp
    else: 
        cohortpts_tf_train_M = cohortpts_tf_train[cohortpts_tf_train.person_id.isin(train_personid_M)]
        controlpts_tf_train_M = controlpts_tf_train[controlpts_tf_train.person_id.isin(train_personid_M)]
        print('cohortpts_M shape: {}\ncontrolpts_M shape: {}'.format(cohortpts_tf_train_M.shape, controlpts_tf_train_M.shape))
        cohortpts_tf_train_M, controlpts_tf_train_M, _ = match_patients(cohortpts_tf_train_M, controlpts_tf_train_M, dxgroup, 
                            cols_match_cat = cols_match_cat, cols_match_num = cols_match_num, ratio = options['ratio'], return_split = True)
        allpt_tf_matched_M = cohortpts_tf_train_M.append(controlpts_tf_train_M).set_index('person_id')
        joblib.dump({'cohortpts_tf_train': cohortpts_tf_train_M, 'controlpts_tf_train': controlpts_tf_train_M, 
                     'allpt_tf_matched': allpt_tf_matched_M}, odir_tf_m + 'cohort_control_pt_train_tf.joblib')

        mytable = TableOne(allpt_tf_matched_M, columns = cols_match_num + cols_match_cat, 
                           groupby=dxgroup, categorical = cols_match_cat, smd = True, pval = True);
        display(mytable)
        mytable.to_csv(odir_tf_m + 'allpt_train_tf_matched.csv')
        
    # preprocess
    if os.path.isfile(odir_tf_f + 'model_matched_input_data.joblib') and os.path.isfile(odir_tf_m + 'model_matched_input_data.joblib'):
        print('loading in X/y females...')
        tt_temp = joblib.load(odir_tf_f + 'model_matched_input_data.joblib')
        train_personid_matched_F = tt_temp['train_personid_matched']; test_personid_matched_F = tt_temp['test_personid_matched']
        X_train_F = tt_temp['X_train']; X_test_F = tt_temp['X_test']
        y_train_F = tt_temp['y_train']; y_test_F = tt_temp['y_test']
        varthresh_F = tt_temp['varthresh']; feature_names_var_F = feature_names[varthresh_F.get_support(1)]; 

        print('loading in X/y males...')
        tt_temp = joblib.load(odir_tf_m + 'model_matched_input_data.joblib')
        train_personid_matched_M = tt_temp['train_personid_matched']; test_personid_matched_M = tt_temp['test_personid_matched']
        X_train_M = tt_temp['X_train']; X_test_M = tt_temp['X_test']
        y_train_M = tt_temp['y_train']; y_test_M = tt_temp['y_test']
        varthresh_M = tt_temp['varthresh']; feature_names_var_M = feature_names[varthresh_M.get_support(1)]
        del tt_temp

        N_FEATURES_F = varthresh_F.get_support().sum(); N_FEATURES_M = varthresh_M.get_support().sum()
        X_train_F2 = varthresh_F.transform(X_train_F); X_train_M2 = varthresh_M.transform(X_train_M)
        X_test_F2 = X_test_F[feature_names_var_F].to_numpy(); X_test_M2 = X_test_M[feature_names_var_M].to_numpy();
        X_test_mF = X_test.loc[test_personid_matched_F, feature_names_var_F].fillna(0)
        X_test_mM = X_test.loc[test_personid_matched_M, feature_names_var_M].fillna(0)
        X_test_mF2 = X_test_mF.to_numpy(); X_test_mM2 = X_test_mM.to_numpy();
    else: 
        ########
        # X and y TRAIN MATCHED 
        ########
        train_personid_matched_F = allpt_tf_matched_F.index; train_personid_matched_M = allpt_tf_matched_M.index
        X_train_F = X_train.loc[train_personid_matched_F]; X_train_M = X_train.loc[train_personid_matched_M]
        y_train_F = allpt_tf_matched_F[dxgroup].to_numpy(); y_train_M = allpt_tf_matched_M[dxgroup].to_numpy()
        print('shape of X_train: {}, X_train_F: {}, X_train_M: {}'.format(X_train.shape, X_train_F.shape, X_train_M.shape))
        print('len of y_train_F: {}, y_train_M: {}'.format(len(y_train_F), len(y_train_M)))

        varthresh_F = VarianceThreshold().fit(X_train_F); varthresh_M = VarianceThreshold().fit(X_train_M)
        print('variance thresholded n features F:{}, M:{}'.format(varthresh_F.get_support(1).shape, varthresh_M.get_support(1).shape))
        feature_names_var_F = feature_names[varthresh_F.get_support(1)]; feature_names_var_M = feature_names[varthresh_M.get_support(1)]

        N_FEATURES_F = varthresh_F.get_support().sum(); N_FEATURES_M = varthresh_M.get_support().sum()
        X_train_F2 = varthresh_F.transform(X_train_F); X_train_M2 = varthresh_M.transform(X_train_M)

        ########
        # X and y TEST with new variance thresholds
        ########
        X_test_F = X_test.loc[test_personid_F]; X_test_M = X_test.loc[test_personid_M];
        X_test_F2 = X_test_F[feature_names_var_F].to_numpy(); X_test_M2 = X_test_M[feature_names_var_M].to_numpy();
        print('shape of X_test: {}, X_test_F: {}, X_test_M: {}'.format(X_test.shape, X_test_F2.shape, X_test_M2.shape))

        test_personid_matched_F = allpt_tf_test_matched_F.index; test_personid_matched_M = allpt_tf_test_matched_M.index
        X_test_mF = X_test.loc[test_personid_matched_F, feature_names_var_F].fillna(0)
        X_test_mM = X_test.loc[test_personid_matched_M, feature_names_var_M].fillna(0)
        X_test_mF2 = X_test_mF.to_numpy(); X_test_mM2 = X_test_mM.to_numpy();
        print('shape of X_test_mF (matched): {}, X_test_mM (matched): {}'.format(X_test_mF.shape, X_test_mM.shape))

        y_test_F = allpt_tf.loc[test_personid_F][dxgroup].to_numpy(); y_test_M = allpt_tf.loc[test_personid_M][dxgroup].to_numpy()
        y_test_mF = allpt_tf_test_matched_F[dxgroup].to_numpy(); y_test_mM = allpt_tf_test_matched_M[dxgroup].to_numpy()

        # save
        joblib.dump({'X_train':X_train_F, 'X_test':X_test_F, 'y_train':y_train_F, 'y_test':y_test_F, 'feature_names':feature_names,
                 'train_personid_matched':train_personid_matched_F,'test_personid_matched':test_personid_matched_F, 'varthresh':varthresh_F},
                 odir_tf_f + 'model_matched_input_data.joblib')
        joblib.dump({'X_train':X_train_M, 'X_test':X_test_M, 'y_train':y_train_M, 'y_test':y_test_M, 'feature_names':feature_names,
                 'train_personid_matched':train_personid_matched_M,'test_personid_matched':test_personid_matched_M, 'varthresh':varthresh_M},
                 odir_tf_m + 'model_matched_input_data.joblib')
        
    ### Random Forest Models
    # FEMALES
    if os.path.isfile(odir_tf_f+'rf_matched_model.joblib'):
        print('reading in saved model...')
        rf_matched_dict_F  = joblib.load(fname)
        rf_feat_import_matched_F = feature_context(rf_matched_dict_F['feat_import'], feature_name_info, 
                            import_col = 'rf_import', modelkind = 'rf_matched')
    else:  
        rf_matched_dict_F = rf_model(X_train_F2, y_train_F,
                     X_test_F2, y_test_F, feature_names_var_F, options, 
                     odir_tf = odir_tf_f, modelsuffix = '_matched',
                     n_its = 30, do_gridsearch = False)
        rf_feat_import_matched_F = feature_context(rf_matched_dict_F['feat_import'], feature_name_info, 
                            import_col = 'rf_import', modelkind = 'rf_matched', odir_tf = odir_tf_f)
    rf_metrics2_F = analyze_clf(rf_matched_dict_F['model'], X_train_F2, X_test_mF2, 
                    y_train_F, y_test_mF, odir_tf_f+'rf_matched_testmatched', 
                    dxgroup, comparison, show_fig = True)
    print(rf_metrics2_F)
    
    # MALES
    if os.path.isfile(odir_tf_m+'rf_matched_model.joblib') and not redo:
        print('reading in saved model...')
        rf_matched_dict_M  = joblib.load(fname)
        rf_feat_import_matched_M = feature_context(rf_matched_dict_M['feat_import'], feature_name_info, 
                            import_col = 'rf_import', modelkind = 'rf_matched')
    else:  
        rf_matched_dict_M = rf_model(X_train_M2, y_train_M,
                     X_test_M2, y_test_M, feature_names_var_M, options, odir_tf = odir_tf_m, 
                     modelsuffix = '_matched', n_its = 30, do_gridsearch = False)
        rf_feat_import_matched_M = feature_context(rf_matched_dict_M['feat_import'], feature_name_info, 
                            import_col = 'rf_import', modelkind = 'rf_matched', odir_tf = odir_tf_m)

    rf_metrics2_M = analyze_clf(rf_matched_dict_M['model'], X_train_M2, X_test_mM2, 
                    y_train_M, y_test_mM, odir_tf_m+'rf_matched_testmatched', 
                    dxgroup, comparison, show_fig = True)
    print(rf_metrics2_M)