## T2D disease prediction  

In [2]:
import numpy as np
import pandas as pd 
import os

disease = "T2D" 
# feature_string = 'K_' or 'gi_'
def loadData(feature_string , label_string , label_dict) :
    #read file
    
    filename = "./data/abundance_" + disease + ".txt"  
    if os.path.isfile(filename) :
        rawdata = pd.read_csv(filename , sep = '\t' , index_col=0 , header=None) 
    else :
        print("FileNotFoundError: File {} does not exist".format(filename))
        exit()

    # select rows having feature index identifier string  
    X = rawdata.loc[rawdata.index.str.contains(feature_string, regex=False)].astype('float64')

    # get class labels
    Y = rawdata.loc[label_string] #'disease'
    Y = Y.replace(label_dict).astype('int')
     
    return X , Y 

# def prepare_data(config) :
feature_string = 'k__'
label_string = 'disease'
label_dict = {
    # Controls
    'n': 0,
    # Cirrhosis
    'cirrhosis': 1, 
    # T2D and WT2D
    't2d': 1,
    # Obesity
    'leaness': 0, 'obesity': 1,
}

Raw_X_data , labels = loadData(feature_string , label_string , label_dict )
Raw_X_data = Raw_X_data.transpose() 
labels = labels.values
def filter_data(x , y , filter_thresh) :
    
    classes = np.unique(y) 
    index = x.index.values  

    num_counts = {} 
    for c in classes :
        sub_x = x[y == c]
        num_samples = len(sub_x) 
        # sub_x[sub_x > 0].count()  
        num_counts[str(c)] = sub_x[sub_x > 0].count() / float(num_samples)

    core = pd.DataFrame(index=index)
    for feature in x.columns.values:
        for c in classes : 
            if(num_counts[str(c)].loc[feature] >= filter_thresh) :
                #core[feature] = x[feature].copy()
                core = pd.concat([core , x[feature]] , axis=1)
                break 
    return core 

def get_feature_df(features):
    kingdom, phylum, cl, order, family, genus, species  = [], [], [], [], [], [], []
    for f in features:

        name = f.split("k__")[1].split("|p__")[0].replace(".","")
        if "_unclassified" in name:
            name = 'unclassified_' + name.split("_unclassified")[0]
        kingdom.append(name)

        if "p__" in f:
            name =f.split("p__")[1].split("|c__")[0].replace(".","")
            if "_unclassified" in name:
                name = 'unclassified_' + name.split("_unclassified")[0]
            if name != "":
                phylum.append(name)
            else:
                phylum.append("NA")
        else:
            phylum.append("NA")
            
        if "c__" in f:
            name = f.split("c__")[1].split("|o__")[0].replace(".","")
            if "_unclassified" in name:
                name = 'unclassified_' + name.split("_unclassified")[0]
            if name != "":
                cl.append(name)
            else:
                cl.append("NA")
        else:
            cl.append("NA")
            
        if "o__" in f:
            name = f.split("o__")[1].split("|f__")[0].replace(".","")
            if "_unclassified" in name:
                name = 'unclassified_' + name.split("_unclassified")[0]
            if name != "":
                order.append(name)
            else:
                order.append("NA")
        else:
            order.append("NA")
            
        if "f__" in f:
            name = f.split("f__")[1].split("|g__")[0].replace(".","")
            if "_unclassified" in name:
                name = 'unclassified_' + name.split("_unclassified")[0]
            if name != "":
                family.append(name)
            else:
                family.append("NA")
        else:
            family.append("NA")
            
        if "g__" in f:
            name = f.split("g__")[1].split("|s__")[0].replace(".","")
            if "_unclassified" in name:
                name = 'unclassified_' + name.split("_unclassified")[0]
            if name != "":
                genus.append(name)
            else:
                genus.append("NA")
        else:
            genus.append("NA")
            
        if "s__" in f:
            name = f.split("s__")[1]
            if "_unclassified" in name:
                name = 'unclassified_' + name.split("_unclassified")[0]
            if name != "":
                species.append(name)
            else:
                species.append("NA")
        else:
            species.append("NA")
            
    if len(species) == 0:
        d = {'kingdom': kingdom, 'phylum': phylum, 'class':cl,
            'order':order, 'family':family, 'genus':genus}
        feature_df = pd.DataFrame(data=d)
        feature_df.index = feature_df['genus']
    else:
        d = {'kingdom': kingdom, 'phylum': phylum, 'class':cl,
            'order':order, 'family':family, 'genus':genus, 'species': species}
        feature_df = pd.DataFrame(data=d)
        feature_df.index = feature_df['species']
    return feature_df
 
filter_X_data = filter_data(Raw_X_data , labels , 0.2)
features = list(filter_X_data.columns.values)
features_df = get_feature_df(features)  
print("samples are %d , Raw features are %d ..." % (Raw_X_data.shape[0] ,  Raw_X_data.shape[1]))  
print("filter data after samples are %d , filter Raw features are %d ..." % (filter_X_data.shape[0] ,  filter_X_data.shape[1])) 

samples are 344 , Raw features are 572 ...
filter data after samples are 344 , filter Raw features are 170 ...


In [40]:
from graph import Graph
import pickle
from joblib import Parallel, delayed
import multiprocessing
from copy import deepcopy

def generate_dense_maps(x, g, f, p=-1):
    id = multiprocessing.Process()._identity
    temp_g = deepcopy(g)
    temp_g.populate_graph(f, x)
    map = temp_g.get_dense_map()
    vector = temp_g.graph_vector_features()
    del(temp_g)
    return np.array(map)

print("Contsructing tree..")
g = Graph()
g.build_graph()
g.prune_graph(features_df)
print("Populating trees...")	 
results2 = Parallel(n_jobs=4)(delayed(generate_dense_maps)(x,g,features_df) for x in filter_X_data.values)
x_data_dense_maps = np.array(results2) 
filter_x_data = filter_X_data.values 
print(x_data_dense_maps.shape)
print(filter_x_data.shape)

Contsructing tree..
Pruning Tree...
Populating trees...
(344, 5, 120)
(344, 170)


In [47]:
from sklearn.metrics import roc_auc_score ,accuracy_score , matthews_corrcoef, precision_score, recall_score, f1_score
from torch.utils.data import Dataset , DataLoader
from sklearn.model_selection import StratifiedKFold 
from sklearn.preprocessing import MinMaxScaler
from new_Micro_Permutator import Micro_P  
import torch 
import shap

#just for reaptable results
seed = 100
np.random.seed(seed)
np.random.shuffle(filter_x_data) 
np.random.seed(seed)
np.random.shuffle(x_data_dense_maps)  
np.random.seed(seed)
np.random.shuffle(labels)

cv_list = ["Run_" + "_CV_" + str(y) for y in range(10)]
MicroP_stat_df = pd.DataFrame(index=["AUC", "ACC" , "MCC", "Precision", "Recall", "F1" , "repeat_seed"], columns=cv_list) 
Shap_df_cv = pd.DataFrame(index=features_df.index , columns=cv_list)

In [50]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
skf_index = skf.split(x_data_dense_maps, labels) 
for j , idx in enumerate(skf_index) : 
    fold_num = "fold_%s" % str(j).zfill(2)
    print('#'*50 + ' %s ' % (fold_num) + '#'*50 )

    train_index, test_index = idx    
    train_x, test_x = x_data_dense_maps[train_index], x_data_dense_maps[test_index] 
    train_raw_x , test_raw_x = filter_x_data[train_index] , filter_x_data[test_index]
    train_y, test_y = labels[train_index], labels[test_index]
    
    class DatasetLoarder(Dataset) :
        def __init__(self , X_train , y_train) :
            self.len = X_train.shape[0]
            self.x_data = torch.from_numpy(X_train).type(torch.FloatTensor)
            self.y_data = torch.from_numpy(y_train).type(torch.LongTensor)
            
        def __getitem__(self , index) :
            return self.x_data[index] , self.y_data[index]

        def __len__(self) :
            return self.len 

    train_dataset  = DatasetLoarder(train_x , train_y)  
    train_loader = DataLoader(dataset=train_dataset , batch_size=32 , num_workers=0)  
    test_dataset = DatasetLoarder(test_x , test_y)
    test_loader = DataLoader(dataset=test_dataset , batch_size=32 , num_workers=0 )  
            
            
    def eval(model , test_loader): # test
        model.eval()
        true_label = [] 
        y_prob = []
        with torch.no_grad():

            for step , batch in enumerate(test_loader):
                x, label = batch
                val_output = model(x)
                true_label  = true_label + label.tolist() 
                y_prob = y_prob + val_output.tolist()
                
        y_prob = np.array(y_prob)
        true_label = np.array(true_label)
        return true_label , y_prob 

    def train(model , train_loader , test_loader , learn_rate , epoch ) :
        
        # loss function
        criterion = torch.nn.CrossEntropyLoss()
        # optimizer
        optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate ) # 2e-4 
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5 , gamma=0.5)
        
        for i in range(epoch):
            model.train()
            # one epoch
            for step, batch in enumerate(train_loader):
                x, label = batch
                output = model(x)
                loss = criterion(output, label)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            scheduler.step()
            # y_true , y_prob  = eval(model=model , test_loader=test_loader) 
            # y_pred = np.argmax(y_prob , axis=1)
            # #print("method = {}  test Epoch:{} ,   eval_acc:{} , eval_auc :{} ".format(method , i, round(accuracy_score(y_true, y_pred), 3) , round(roc_auc_score(y_true, y_prob, multi_class='ovo'), 3)))
            # print("method = {}  test Epoch:{} ,  eval_acc:{} ".format("method" , i, round(accuracy_score(y_true, y_pred), 3)))

                
    model = Micro_P(
        image_h= train_x.shape[1],
        image_w= train_x.shape[2],
        segments = 8,
        patch_h = train_x.shape[1] ,
        patch_w = 3 ,
        dim = 48 ,
        depth = 1,
        num_classes = 2 ,
        expansion_factor = 1, 
    )
    print("training........ Micro Permutator model ................. ")

    train(model  , train_loader , test_loader , learn_rate=5e-4 , epoch=20)
    
    y_true , y_prob  = eval(model=model , test_loader=test_loader) 
    y_pred = np.argmax(y_prob , axis=1)
    
    
    metrics = {   
        "ACC" : round(accuracy_score(y_true, y_pred), 3),
        "Recall" : round(recall_score(y_true, y_pred , average='weighted') , 3 ) ,
        "Precision" : round(precision_score(y_true, y_pred, average='weighted') , 3) ,
        "F1"    : round(f1_score(y_true, y_pred, average='weighted') , 3) , 
        "MCC" : round(matthews_corrcoef(y_true, y_pred), 3),
        "AUC" : round(roc_auc_score(y_true, y_prob[:, 1]) , 3) ,
        "method" : "Micro_P",
    } 
    
    print("MicroP  fold {} , metrics {}".format(j , metrics))

    MicroP_stat_df.loc["AUC"]["Run_" + "_CV_" + str(j)] = metrics["AUC"]
    MicroP_stat_df.loc["ACC"]["Run_" + "_CV_" + str(j)] = metrics["ACC"]
    MicroP_stat_df.loc["MCC"]["Run_" + "_CV_" + str(j)] = metrics["MCC"]
    MicroP_stat_df.loc["Precision"]["Run_" + "_CV_" + str(j)] =  metrics["Precision"]
    MicroP_stat_df.loc["Recall"]["Run_" + "_CV_" + str(j)]= metrics["Recall"]
    MicroP_stat_df.loc["F1"]["Run_" + "_CV_" + str(j)] =  metrics["F1"] 

    # explainer model
    
    model.eval() 
    def predict(input_data):
        X_data = []
        for x in input_data : 
            map = generate_dense_maps(x , g , features_df)
            X_data.append(map)
        X_data = np.array(X_data)
        output = model(torch.tensor(X_data).type(torch.FloatTensor)) 
        output_np = output.detach().numpy()
        return output_np
    
    # gmin = train_raw_x.min().min()
    # backgroud_data =  np.full(shape=(1, train_raw_x.shape[1]), fill_value = gmin)
    backgroud_data = shap.kmeans(train_raw_x, 2) 
    explainer = shap.KernelExplainer(predict, backgroud_data)
    shap_values = explainer.shap_values(test_raw_x)

    mean_shap_values = np.mean(shap_values[0], axis=0)  
    Shap_df_cv["Run_" + "_CV_" + str(j)] = mean_shap_values
    if j == 0 : 
        Shap_df_cv.to_csv('Shap_df_cv_0.csv', index=False)
    
    del(model) 
    
    #break
                

################################################## fold_00 ##################################################
training........ Micro Permutator model ................. 
MicroP  fold 0 , metrics {'ACC': 0.829, 'Recall': 0.829, 'Precision': 0.846, 'F1': 0.826, 'MCC': 0.673, 'AUC': 0.912, 'method': 'Micro_P'}


  0%|          | 0/35 [00:00<?, ?it/s]The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 
The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC()

################################################## fold_01 ##################################################
training........ Micro Permutator model ................. 
MicroP  fold 1 , metrics {'ACC': 0.629, 'Recall': 0.629, 'Precision': 0.634, 'F1': 0.627, 'MCC': 0.264, 'AUC': 0.709, 'method': 'Micro_P'}


  0%|          | 0/35 [00:00<?, ?it/s]The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 
The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC()

################################################## fold_02 ##################################################
training........ Micro Permutator model ................. 
MicroP  fold 2 , metrics {'ACC': 0.829, 'Recall': 0.829, 'Precision': 0.833, 'F1': 0.828, 'MCC': 0.662, 'AUC': 0.886, 'method': 'Micro_P'}


  0%|          | 0/35 [00:00<?, ?it/s]The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 
The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC()

################################################## fold_03 ##################################################
training........ Micro Permutator model ................. 
MicroP  fold 3 , metrics {'ACC': 0.686, 'Recall': 0.686, 'Precision': 0.686, 'F1': 0.685, 'MCC': 0.37, 'AUC': 0.742, 'method': 'Micro_P'}


  0%|          | 0/35 [00:00<?, ?it/s]The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 
The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC()

################################################## fold_04 ##################################################
training........ Micro Permutator model ................. 
MicroP  fold 4 , metrics {'ACC': 0.794, 'Recall': 0.794, 'Precision': 0.795, 'F1': 0.794, 'MCC': 0.589, 'AUC': 0.913, 'method': 'Micro_P'}


  0%|          | 0/34 [00:00<?, ?it/s]The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 
The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC()

################################################## fold_05 ##################################################
training........ Micro Permutator model ................. 
MicroP  fold 5 , metrics {'ACC': 0.647, 'Recall': 0.647, 'Precision': 0.656, 'F1': 0.642, 'MCC': 0.303, 'AUC': 0.692, 'method': 'Micro_P'}


  0%|          | 0/34 [00:00<?, ?it/s]The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 
The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC()

################################################## fold_06 ##################################################
training........ Micro Permutator model ................. 
MicroP  fold 6 , metrics {'ACC': 0.588, 'Recall': 0.588, 'Precision': 0.593, 'F1': 0.582, 'MCC': 0.182, 'AUC': 0.74, 'method': 'Micro_P'}


  0%|          | 0/34 [00:00<?, ?it/s]The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 
The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC()

################################################## fold_07 ##################################################
training........ Micro Permutator model ................. 
MicroP  fold 7 , metrics {'ACC': 0.735, 'Recall': 0.735, 'Precision': 0.736, 'F1': 0.735, 'MCC': 0.471, 'AUC': 0.83, 'method': 'Micro_P'}


  0%|          | 0/34 [00:00<?, ?it/s]The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 
The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC()

################################################## fold_08 ##################################################
training........ Micro Permutator model ................. 
MicroP  fold 8 , metrics {'ACC': 0.559, 'Recall': 0.559, 'Precision': 0.561, 'F1': 0.555, 'MCC': 0.12, 'AUC': 0.661, 'method': 'Micro_P'}


  0%|          | 0/34 [00:00<?, ?it/s]The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 
The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC()

################################################## fold_09 ##################################################
training........ Micro Permutator model ................. 
MicroP  fold 9 , metrics {'ACC': 0.706, 'Recall': 0.706, 'Precision': 0.735, 'F1': 0.696, 'MCC': 0.44, 'AUC': 0.734, 'method': 'Micro_P'}


  0%|          | 0/34 [00:00<?, ?it/s]The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 
The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLarsIC()

In [3]:
features_name = features_df.index.tolist()
for idx in range(len(features_name)) : 
    if features_name[idx] == 'NA' :  
        features_name[idx] = features_df.iloc[idx]['genus']
    if 'unclassified' in features_name[idx] :
        features_name[idx] = features_name[idx][13:]  + ' spp.'

Shap_df_cv = pd.read_csv('Shap_df_cv_T2D_mean_10.csv' , index_col=0)

 
# mean_shap_values = Shap_df_cv.mean(axis=1) 
SHAP_mean_df = pd.DataFrame(Shap_df_cv.mean(axis=1 , numeric_only=True).abs(), columns=['SHAP'])
 
label_dfy = pd.DataFrame(labels, columns=['group'])
tmp_data_df = filter_X_data.join(label_dfy)
abundance_mean_df = tmp_data_df.groupby('group').mean().transpose()
abundance_mean_df.columns = ["health" , "disease"]
# print(features_name)
# print(abundance_mean_df.index)
# print(Shap_df_cv.index)
abundance_mean_df.index = features_name
SHAP_mean_df.index = features_name  
SHAP_Abundance_mean_df = SHAP_mean_df.join(abundance_mean_df) 
plot_data_df = SHAP_Abundance_mean_df.sort_values(by='SHAP', ascending=False)
# plot_data_df = data_sort_df[0:20]
print(plot_data_df.head(20))

                                       SHAP    health   disease
Streptococcus_anginosus            0.247920  0.024938  0.032322
Lactobacillus_mucosae              0.225894  0.019058  0.140583
Olsenella spp.                     0.208105  0.000387  0.042497
Megasphaera spp.                   0.199254  0.802202  1.682669
Megamonas_hypermegale              0.186778  0.472121  0.360724
Streptococcus_vestibularis         0.159089  0.044622  0.030209
Streptococcus_salivarius           0.157892  0.469609  0.258079
Coprobacillus spp.                 0.139338  0.063908  0.187530
Oscillibacter spp.                 0.111010  0.354273  0.414373
Peptostreptococcaceae_noname spp.  0.106945  0.202410  0.066780
Veillonella spp.                   0.091690  0.256550  0.282311
Adlercreutzia_equolifaciens        0.090044  0.038135  0.044166
Megamonas_funiformis               0.089438  0.092486  0.132385
Eubacterium_biforme                0.084292  0.214202  0.250333
Clostridium_nexile                 0.083