In [13]:
import pandas as pd
import os
output_folder = '../Output/'


In [87]:
auc_df = pd.read_csv(os.path.join(output_folder, 'mean_roc_auc.csv'), index_col = 0)
fi_df = pd.read_csv(os.path.join(output_folder, 'mean_feature_importance.csv'), index_col = 0)
auc_df.columns = ['Mean_AUC']

In [27]:
import pandas as pd

# Example data
data = {
    "filename": [
        "results_NeuralNet_2_HNSC_lr0.01_epochs50.csv",
        "results_XGBoost_LUAD_lr0.1_nest100.csv",
        "results_LogisticRegression_COAD.csv",
        "results_RandomForest_SKCM_nest100_maxdepth6.csv"
    ]
}

# Create DataFrame
df = auc_df.copy()
df['filename'] = df.index
# Define models and cancer types
models = ["LogisticRegression", "RandomForest", "SVM", "XGBoost"] + [f"NeuralNet_{i+1}" for i in range(3)]
cancer_types = ['COAD', 'SKCM', 'PAAD', 'LUAD', 'STAD', 'HNSC', 'BLCA']

# Extract model, cancer type, and hyperparameters
def parse_filename(filename):
    # Remove "results_" and ".csv"
    base = filename.replace("results_", "").replace(".csv", "")
    
    # Split by underscores
    parts = base.split("_")
    
    # Extract model
    model = next((m for m in models if "_".join(parts[:len(m.split("_"))]) == m), None)
    if model:
        parts = parts[len(model.split("_")):]  # Remove the model part from parts
    
    # Extract cancer type
    cancer_type = next((ct for ct in cancer_types if parts and parts[0] == ct), "all")
    if cancer_type in cancer_types:
        parts = parts[1:]  # Remove the cancer type part from parts
    
    # Remaining parts are hyperparameters
    hyperparameters = "_".join(parts)
    
    return model, cancer_type, hyperparameters

# Apply parsing function
df[['model', 'cancer_type', 'hyperparameters']] = df['filename'].apply(parse_filename).apply(pd.Series)

# Further process hyperparameters into key-value pairs
def parse_hyperparameters(hyperparam_string):
    if not hyperparam_string:
        return {}
    try:
        return {k: v for param in hyperparam_string.split("_") for k, v in [param.split("0.")]}
    except ValueError:
        return {}

# Parse hyperparameters into a dictionary
df['hyperparameters'] = df['hyperparameters'].str.replace('All_', '')
# Display the DataFrame
df

Unnamed: 0,Mean_AUC,model,cancer_type,hyperparameters,hyperparameters_dict,filename
results_NeuralNet_2_HNSC_lr0.01_epochs50.csv,0.582214,NeuralNet_2,HNSC,lr0.01_epochs50,{},results_NeuralNet_2_HNSC_lr0.01_epochs50.csv
results_NeuralNet_2_All_lr0.01_epochs50.csv,0.585203,NeuralNet_2,all,lr0.01_epochs50,{},results_NeuralNet_2_All_lr0.01_epochs50.csv
results_NeuralNet_2_BLCA_lr0.01_epochs50.csv,0.643591,NeuralNet_2,BLCA,lr0.01_epochs50,{},results_NeuralNet_2_BLCA_lr0.01_epochs50.csv
results_NeuralNet_2_All_lr0.01_epochs20.csv,0.644926,NeuralNet_2,all,lr0.01_epochs20,{},results_NeuralNet_2_All_lr0.01_epochs20.csv
results_NeuralNet_2_HNSC_lr0.01_epochs20.csv,0.650277,NeuralNet_2,HNSC,lr0.01_epochs20,{},results_NeuralNet_2_HNSC_lr0.01_epochs20.csv
...,...,...,...,...,...,...
results_XGBoost_COAD_lr0.1_nest100_md6.csv,0.912916,XGBoost,COAD,lr0.1_nest100_md6,{},results_XGBoost_COAD_lr0.1_nest100_md6.csv
results_XGBoost_COAD_.csv,0.912916,XGBoost,COAD,,{},results_XGBoost_COAD_.csv
results_XGBoost_COAD_lr0.1_nest50_md9.csv,0.913288,XGBoost,COAD,lr0.1_nest50_md9,{},results_XGBoost_COAD_lr0.1_nest50_md9.csv
results_XGBoost_SKCM_lr0.1_nest100_md9.csv,0.913525,XGBoost,SKCM,lr0.1_nest100_md9,{},results_XGBoost_SKCM_lr0.1_nest100_md9.csv


In [29]:
df[['Mean_AUC', 'cancer_type']].groupby('cancer_type').max()

Unnamed: 0_level_0,Mean_AUC
cancer_type,Unnamed: 1_level_1
BLCA,0.871857
COAD,0.91467
HNSC,0.888789
LUAD,0.900509
PAAD,0.908294
SKCM,0.913525
STAD,0.902533
all,0.906102


In [39]:
best_parameters_auc = pd.DataFrame({'parameters': df[['Mean_AUC', 'cancer_type']].groupby('cancer_type').idxmax()['Mean_AUC'], 'AUC':df[['Mean_AUC', 'cancer_type']].groupby('cancer_type').max()['Mean_AUC']})
best_parameters_auc

Unnamed: 0_level_0,parameters,AUC
cancer_type,Unnamed: 1_level_1,Unnamed: 2_level_1
BLCA,results_XGBoost_BLCA_lr0.1_nest100_md9.csv,0.871857
COAD,results_XGBoost_COAD_lr0.1_nest100_md9.csv,0.91467
HNSC,results_NeuralNet_2_HNSC_lr0.001_epochs50.csv,0.888789
LUAD,results_XGBoost_LUAD_lr0.1_nest100_md9.csv,0.900509
PAAD,results_NeuralNet_2_PAAD_lr0.001_epochs50.csv,0.908294
SKCM,results_XGBoost_SKCM_lr0.1_nest100_md9.csv,0.913525
STAD,results_XGBoost_STAD_lr0.1_nest100_md9.csv,0.902533
all,results_RandomForest_All_.csv,0.906102


In [44]:
df[(df['hyperparameters'] == '')]

Unnamed: 0,Mean_AUC,model,cancer_type,hyperparameters,hyperparameters_dict,filename
results_LogisticRegression_PAAD_.csv,0.720801,LogisticRegression,PAAD,,{},results_LogisticRegression_PAAD_.csv
results_LogisticRegression_HNSC_.csv,0.725152,LogisticRegression,HNSC,,{},results_LogisticRegression_HNSC_.csv
results_LogisticRegression_BLCA_.csv,0.777854,LogisticRegression,BLCA,,{},results_LogisticRegression_BLCA_.csv
results_LogisticRegression_STAD_.csv,0.781195,LogisticRegression,STAD,,{},results_LogisticRegression_STAD_.csv
results_NeuralNet_2_All_.csv,0.820646,NeuralNet_2,all,,{},results_NeuralNet_2_All_.csv
results_LogisticRegression_COAD_.csv,0.826493,LogisticRegression,COAD,,{},results_LogisticRegression_COAD_.csv
results_NeuralNet_2_BLCA_.csv,0.83736,NeuralNet_2,BLCA,,{},results_NeuralNet_2_BLCA_.csv
results_NeuralNet_2_LUAD_.csv,0.843366,NeuralNet_2,LUAD,,{},results_NeuralNet_2_LUAD_.csv
results_NeuralNet_2_SKCM_.csv,0.849749,NeuralNet_2,SKCM,,{},results_NeuralNet_2_SKCM_.csv
results_NeuralNet_3_BLCA_.csv,0.85811,NeuralNet_3,BLCA,,{},results_NeuralNet_3_BLCA_.csv


In [54]:
df[['Mean_AUC', 'model', 'cancer_type']].groupby(['model', 'cancer_type']).max()

Unnamed: 0_level_0,Unnamed: 1_level_0,Mean_AUC
model,cancer_type,Unnamed: 2_level_1
LogisticRegression,BLCA,0.777854
LogisticRegression,COAD,0.826493
LogisticRegression,HNSC,0.725152
LogisticRegression,PAAD,0.720801
LogisticRegression,STAD,0.781195
NeuralNet_1,BLCA,0.86597
NeuralNet_1,COAD,0.908903
NeuralNet_1,HNSC,0.882219
NeuralNet_1,LUAD,0.891021
NeuralNet_1,PAAD,0.886416


In [53]:
df[['Mean_AUC', 'model', 'cancer_type']].groupby(['model', 'cancer_type']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Mean_AUC
model,cancer_type,Unnamed: 2_level_1
LogisticRegression,BLCA,0.777854
LogisticRegression,COAD,0.826493
LogisticRegression,HNSC,0.725152
LogisticRegression,PAAD,0.720801
LogisticRegression,STAD,0.781195
NeuralNet_1,BLCA,0.857663
NeuralNet_1,COAD,0.903382
NeuralNet_1,HNSC,0.87176
NeuralNet_1,LUAD,0.883133
NeuralNet_1,PAAD,0.867734


In [51]:
df[(df['hyperparameters'] == '')][['Mean_AUC', 'model', 'cancer_type']].groupby(['model', 'cancer_type']).max()

Unnamed: 0_level_0,Unnamed: 1_level_0,Mean_AUC
model,cancer_type,Unnamed: 2_level_1
LogisticRegression,BLCA,0.777854
LogisticRegression,COAD,0.826493
LogisticRegression,HNSC,0.725152
LogisticRegression,PAAD,0.720801
LogisticRegression,STAD,0.781195
NeuralNet_1,BLCA,0.858608
NeuralNet_1,COAD,0.904127
NeuralNet_1,HNSC,0.873197
NeuralNet_1,LUAD,0.884134
NeuralNet_1,PAAD,0.873182


In [121]:
fi_df = pd.read_csv(os.path.join(output_folder, 'mean_feature_importance.csv'), index_col = 0)


In [58]:
best_file_names = df[['Mean_AUC', 'model', 'cancer_type']].groupby(['model', 'cancer_type']).idxmax()['Mean_AUC'].values

In [73]:
temp_df = df[df.index.str.replace('results_', '').isin(fi_df.index.str.replace('feature_importances_', ''))]
best_file_names = temp_df[['Mean_AUC', 'model', 'cancer_type']].groupby(['model', 'cancer_type']).idxmax()['Mean_AUC'].str.replace('results_', 'feature_importances_').values

In [94]:
fi_df = fi_df[fi_df.index.isin(best_file_names)]
fi_df.loc['mean_importance'] = fi_df.mean()
fi_df.loc['abs_mean_importance'] = fi_df.loc['mean_importance'].abs()
fi_df.sort_values('abs_mean_importance', axis = 1)

Unnamed: 0,gastrointestinal_tract_,Chromosome_DNA_12,Cancer_type_HNSC,Cancer_type_LUAD,Variant_Classification_DNA_Start_Codon_SNP,Chromosome_DNA_11,Cancer_type_PAAD,Cancer_type_BLCA,Cancer_type_SKCM,Reference_Allele_DNA_T,...,vulva,skin,vagina,fallopian_tube,placenta,genital_tract,thyroid,lung,upper_aerodigestive_tract,peritoneum
feature_importances_RandomForest_All_.csv,0.0,0.002405,0.002574,0.003849,7.3e-05,0.002434,0.000447,0.003843,0.006609,0.00295,...,1.3e-05,0.018421,5.908944e-06,1e-06,5e-06,2.6e-05,0.000334,0.038364,0.009912,3.7e-05
feature_importances_LogisticRegression_PAAD_.csv,0.0,-0.049381,0.0,0.0,0.017656,0.01141,0.0,0.0,0.0,-0.000656,...,0.089193,-2.001285,0.07779609,0.077796,0.077796,0.352233,1.80418,-6.476537,-0.400895,0.404819
feature_importances_RandomForest_PAAD_.csv,0.0,0.002959,0.0,0.0,5.7e-05,0.002845,0.0,0.0,0.0,0.003388,...,0.00019,0.017383,0.0002011915,2.8e-05,3.1e-05,0.000734,0.003265,0.042056,0.012634,7e-05
feature_importances_RandomForest_HNSC_.csv,0.0,0.002777,0.0,0.0,7.7e-05,0.002886,0.0,0.0,0.0,0.00343,...,0.000125,0.01821,5.062442e-05,6e-06,6e-06,5.2e-05,0.00038,0.042264,0.012115,2.6e-05
feature_importances_RandomForest_LUAD_.csv,0.0,0.002536,0.0,0.0,0.000105,0.002797,0.0,0.0,0.0,0.003778,...,1.5e-05,0.017744,3.073674e-05,2e-06,1.2e-05,5.6e-05,0.000484,0.0469,0.011255,7.1e-05
feature_importances_XGBoost_PAAD_lr0.1_nest100_md3.csv,0.0,0.000202,0.0,0.0,0.0,0.00151,0.0,0.0,0.0,0.003111,...,0.0,0.010824,0.0,0.0,0.0,0.0,0.0,0.048517,0.006204,0.0
feature_importances_XGBoost_LUAD_lr0.1_nest100_md9.csv,0.0,0.008576,0.0,0.0,0.001748,0.008658,0.0,0.0,0.0,0.006053,...,0.001649,0.010724,0.0,0.0,0.0,0.0,0.010015,0.04253,0.008803,0.000697
feature_importances_XGBoost_STAD_lr0.1_nest100_md9.csv,0.0,0.008456,0.0,0.0,0.0,0.008204,0.0,0.0,0.0,0.007013,...,0.0,0.011795,0.0,0.0,0.0,0.0,0.016309,0.03498,0.009406,0.0
feature_importances_RandomForest_STAD_.csv,0.0,0.002505,0.0,0.0,5.9e-05,0.002655,0.0,0.0,0.0,0.003677,...,3.6e-05,0.018306,1.037567e-05,2e-06,1e-06,1.5e-05,0.000353,0.045092,0.011875,2.3e-05
feature_importances_XGBoost_BLCA_lr0.1_nest100_md9.csv,0.0,0.009349,0.0,0.0,0.0,0.006682,0.0,0.0,0.0,0.006017,...,0.0,0.010325,0.0,0.0,0.0,0.0,0.018458,0.033128,0.008406,0.0


In [96]:
fi_df.sort_values('abs_mean_importance', axis =1)

Unnamed: 0,gastrointestinal_tract_,Chromosome_DNA_12,Cancer_type_HNSC,Cancer_type_LUAD,Variant_Classification_DNA_Start_Codon_SNP,Chromosome_DNA_11,Cancer_type_PAAD,Cancer_type_BLCA,Cancer_type_SKCM,Reference_Allele_DNA_T,...,vulva,skin,vagina,fallopian_tube,placenta,genital_tract,thyroid,lung,upper_aerodigestive_tract,peritoneum
feature_importances_RandomForest_All_.csv,0.0,0.002405,0.002574,0.003849,7.3e-05,0.002434,0.000447,0.003843,0.006609,0.00295,...,1.3e-05,0.018421,5.908944e-06,1e-06,5e-06,2.6e-05,0.000334,0.038364,0.009912,3.7e-05
feature_importances_LogisticRegression_PAAD_.csv,0.0,-0.049381,0.0,0.0,0.017656,0.01141,0.0,0.0,0.0,-0.000656,...,0.089193,-2.001285,0.07779609,0.077796,0.077796,0.352233,1.80418,-6.476537,-0.400895,0.404819
feature_importances_RandomForest_PAAD_.csv,0.0,0.002959,0.0,0.0,5.7e-05,0.002845,0.0,0.0,0.0,0.003388,...,0.00019,0.017383,0.0002011915,2.8e-05,3.1e-05,0.000734,0.003265,0.042056,0.012634,7e-05
feature_importances_RandomForest_HNSC_.csv,0.0,0.002777,0.0,0.0,7.7e-05,0.002886,0.0,0.0,0.0,0.00343,...,0.000125,0.01821,5.062442e-05,6e-06,6e-06,5.2e-05,0.00038,0.042264,0.012115,2.6e-05
feature_importances_RandomForest_LUAD_.csv,0.0,0.002536,0.0,0.0,0.000105,0.002797,0.0,0.0,0.0,0.003778,...,1.5e-05,0.017744,3.073674e-05,2e-06,1.2e-05,5.6e-05,0.000484,0.0469,0.011255,7.1e-05
feature_importances_XGBoost_PAAD_lr0.1_nest100_md3.csv,0.0,0.000202,0.0,0.0,0.0,0.00151,0.0,0.0,0.0,0.003111,...,0.0,0.010824,0.0,0.0,0.0,0.0,0.0,0.048517,0.006204,0.0
feature_importances_XGBoost_LUAD_lr0.1_nest100_md9.csv,0.0,0.008576,0.0,0.0,0.001748,0.008658,0.0,0.0,0.0,0.006053,...,0.001649,0.010724,0.0,0.0,0.0,0.0,0.010015,0.04253,0.008803,0.000697
feature_importances_XGBoost_STAD_lr0.1_nest100_md9.csv,0.0,0.008456,0.0,0.0,0.0,0.008204,0.0,0.0,0.0,0.007013,...,0.0,0.011795,0.0,0.0,0.0,0.0,0.016309,0.03498,0.009406,0.0
feature_importances_RandomForest_STAD_.csv,0.0,0.002505,0.0,0.0,5.9e-05,0.002655,0.0,0.0,0.0,0.003677,...,3.6e-05,0.018306,1.037567e-05,2e-06,1e-06,1.5e-05,0.000353,0.045092,0.011875,2.3e-05
feature_importances_XGBoost_BLCA_lr0.1_nest100_md9.csv,0.0,0.009349,0.0,0.0,0.0,0.006682,0.0,0.0,0.0,0.006017,...,0.0,0.010325,0.0,0.0,0.0,0.0,0.018458,0.033128,0.008406,0.0


In [132]:
temp_df = df[df.index.str.contains('XGB') | df.index.str.contains('Forest') | df.index.str.contains('Regre')]
temp_df = temp_df[temp_df['hyperparameters'] == '']
best_file_names = temp_df.index.str.replace("results", "feature_importances")

In [137]:
temp_fi_df = fi_df[fi_df.index.isin(best_file_names)]

In [139]:
temp_fi_df['Cancer_type'] = temp_fi_df.index.str.split('_').str[-2]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_fi_df['Cancer_type'] = temp_fi_df.index.str.split('_').str[-2]


In [143]:
temp_fi_df

Unnamed: 0,COSMIC_total_alterations_in_gene_DNA,Cancer_type_BLCA,Cancer_type_COAD,Cancer_type_HNSC,Cancer_type_LUAD,Cancer_type_PAAD,Cancer_type_SKCM,Cancer_type_STAD,Chromosome_DNA_1,Chromosome_DNA_10,...,t_alt_count_DNA,t_ref_count_DNA,testis,thymus,thyroid,upper_aerodigestive_tract,urinary_tract,vagina,vulva,Cancer_type
feature_importances_RandomForest_All_.csv,0.031037,0.003843,0.003106,0.002574,0.003849,0.000447,0.006609,0.005203,0.00315,0.001864,...,0.028686,0.033492,0.000275,2.9e-05,0.000334,0.009912,0.008472,5.908944e-06,1.3e-05,All
feature_importances_XGBoost_COAD_.csv,0.024226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.006307,0.002671,...,0.004261,0.005094,0.0,0.001375,0.011175,0.007828,0.006721,0.0,0.0,COAD
feature_importances_LogisticRegression_PAAD_.csv,-0.345835,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000309,0.031086,...,-0.53626,0.652294,2.099043,0.522042,1.80418,-0.400895,0.260129,0.07779609,0.089193,PAAD
feature_importances_RandomForest_PAAD_.csv,0.03941,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003565,0.002281,...,0.036995,0.037618,0.001967,0.000254,0.003265,0.012634,0.011084,0.0002011915,0.00019,PAAD
feature_importances_XGBoost_LUAD_.csv,0.017045,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.005379,0.004928,...,0.003647,0.003673,0.005184,0.0,0.008131,0.007711,0.006185,0.0,0.000935,LUAD
feature_importances_RandomForest_HNSC_.csv,0.034956,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003498,0.002033,...,0.028798,0.033365,0.000115,7e-05,0.00038,0.012115,0.010305,5.062442e-05,0.000125,HNSC
feature_importances_XGBoost_BLCA_.csv,0.032937,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003482,0.003841,...,0.004294,0.003934,0.0,0.0,0.007565,0.006697,0.006548,0.0,0.0,BLCA
feature_importances_RandomForest_LUAD_.csv,0.03604,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.003602,0.002148,...,0.028821,0.034323,0.000673,0.000119,0.000484,0.011255,0.009567,3.073674e-05,1.5e-05,LUAD
feature_importances_XGBoost_All_.csv,0.022051,0.006321,0.027889,0.004682,0.002689,0.00536,0.005573,0.016397,0.002316,0.001265,...,0.003179,0.001309,0.000738,0.0,0.02226,0.00516,0.003942,0.0,0.0,All
feature_importances_XGBoost_PAAD_.csv,0.015702,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012026,0.008779,...,0.011399,0.009904,0.0,0.0,0.009296,0.010309,0.011739,0.0,0.0,PAAD


In [147]:
temp_grouped = temp_fi_df.groupby('Cancer_type').mean()

In [149]:
temp_grouped.loc['Mean_Importance'] = temp_grouped.mean()
temp_grouped.loc['Abs_Mean_Importance'] = temp_grouped.loc['Mean_Importance'].abs()

In [153]:
temp_grouped.sort_values('Abs_Mean_Importance', axis =1, ascending=False)

Unnamed: 0_level_0,peritoneum,upper_aerodigestive_tract,lung,thyroid,genital_tract,placenta,fallopian_tube,vagina,skin,vulva,...,Cancer_type_BLCA,Right_flank_base_G,Cancer_type_HNSC,Cancer_type_LUAD,Variant_Classification_DNA_Nonstop_Mutation,Cancer_type_PAAD,Chromosome_DNA_11,Variant_Classification_DNA_Start_Codon_SNP,Reference_Allele_DNA_T,gastrointestinal_tract_
Cancer_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
All,1.8e-05,0.007536,0.038646,0.011297,1.3e-05,2.279516e-06,5.438282e-07,2.954472e-06,0.017674,7e-06,...,0.005082,0.00363,0.003628,0.003269,0.00021,0.002904,0.002805,3.7e-05,0.002835,0.0
BLCA,5.798299,-1.56101,-1.072851,0.598635,-0.33152,-1.72338,-1.723373,-1.723376,-0.394036,0.249331,...,0.0,-0.004893,0.0,0.0,0.005394,0.0,-0.003116,0.000575,-0.005376,0.0
COAD,6.103823,-2.01034,-0.782704,1.158368,-2.295575,-0.960349,-0.9603486,-0.9603497,-0.690226,-0.696952,...,0.0,0.007052,0.0,0.0,0.000814,0.0,0.000699,-0.003806,-0.001972,0.0
HNSC,4.571423,-1.758411,-0.414345,1.503929,-0.584404,-0.7902531,-0.790253,-0.7902381,-1.546319,-0.811317,...,0.0,-0.002528,0.0,0.0,0.000919,0.0,-0.007538,-0.001374,0.003851,0.0
LUAD,0.000421,0.009483,0.048191,0.004308,2.8e-05,6.033797e-06,1.07913e-06,1.536837e-05,0.015431,0.000475,...,0.0,0.005209,0.0,0.0,0.000166,0.0,0.004049,5.2e-05,0.004441,0.0
PAAD,0.134963,-0.125984,-2.135086,0.60558,0.117656,0.02594224,0.02594141,0.0259991,-0.656881,0.029794,...,0.0,-0.028349,0.0,0.0,-0.004926,0.0,0.007512,0.005904,0.004725,0.0
SKCM,2e-06,0.008395,0.033819,0.002176,2.4e-05,5.487284e-07,8.771894e-07,4.482629e-07,0.02353,7e-06,...,0.0,0.00637,0.0,0.0,0.000361,0.0,0.002384,3.1e-05,0.005321,0.0
STAD,8.942331,-2.226257,-1.269072,1.586357,-2.266339,-1.32631,-1.32631,-1.326307,-1.292378,-2.212995,...,0.0,0.009762,0.0,0.0,0.000173,0.0,-0.008599,-2.2e-05,-0.013222,0.0
Mean_Importance,3.19391,-0.957074,-0.694175,0.683831,-0.670015,-0.5967927,-0.5967926,-0.5967816,-0.565401,-0.430206,...,0.000635,-0.000468,0.000453,0.000409,0.000389,0.000363,-0.000225,0.000175,7.5e-05,0.0
Abs_Mean_Importance,3.19391,0.957074,0.694175,0.683831,0.670015,0.5967927,0.5967926,0.5967816,0.565401,0.430206,...,0.000635,0.000468,0.000453,0.000409,0.000389,0.000363,0.000225,0.000175,7.5e-05,0.0


In [154]:
temp_grouped['Mean_Expression']

Cancer_type
All                    0.270780
BLCA                   0.474809
COAD                   0.818040
HNSC                   0.359724
LUAD                   0.224456
PAAD                   0.139201
SKCM                   0.274952
STAD                   0.438708
Mean_Importance        0.375084
Abs_Mean_Importance    0.375084
Name: Mean_Expression, dtype: float64