In [1]:
import pandas as pd

# Make functions to extract best model score

Because the model scores have different numbers of columns depending on if it is single label or multi label, format two different types of functions. Test them out on an example to make sure they produce similar results

In [2]:
def get_best_model_single(modelname, file_path_prefix, k_range):
    inner_scores = []
    params = ['batch_size','weight_decay','learning_rate','num_epochs','class_weight']
    
    for k in range(k_range): #Change to 5 if you are using the binary
        inner_df = pd.read_csv(f'{file_path_prefix}{k}.csv')
        inner_df = inner_df.sort_values('F1',ascending=False).reset_index(drop=True)
        inner_scores += inner_df.to_dict('records')
    
    inner_scores = pd.DataFrame.from_dict(inner_scores).fillna(-1)
    best_model = (inner_scores
                  .groupby(params).agg({
                      'F1':'mean',
                      'ROC AUC':'mean',
                      'precision':'mean',
                      'recall':'mean',
                      'accuracy':'mean'
                      }).sort_values('F1',ascending=False).reset_index()).to_dict('records')[0]
    best_model = pd.DataFrame(best_model,index=[modelname])
    
    # re-arrange so hyperparameters are at the end
    best_model = best_model[[c for c in best_model if c not in params] 
        + [c for c in params if c in best_model]]
    
    del inner_scores, inner_df
    return best_model

In [5]:
test1=get_best_model_single('climate_mitigation',f'/home/dveytia/ORO-map-relevance/outputs/model_selection/climate_mitigation_model_selection_', 3)
test1

Unnamed: 0,F1,ROC AUC,precision,recall,accuracy,batch_size,weight_decay,learning_rate,num_epochs,class_weight
climate_mitigation,0.802477,0.938158,0.726099,0.897321,0.879555,16,0.0,1e-05,2,-1


In [3]:
def get_best_model_multi(modelname, file_path_prefix, k_range): 
    inner_scores = []
    params = ['batch_size','weight_decay','learning_rate','num_epochs','class_weight']
    
    for k in range(k_range): 
        inner_df = pd.read_csv(f'{file_path_prefix}{k}.csv')
        inner_df = inner_df.sort_values('F1 macro', ascending=False).reset_index(drop=True)
        inner_scores += inner_df.to_dict('records')
    
    inner_scores = pd.DataFrame.from_dict(inner_scores).fillna(-1)
    
    #if 'accuracy macro' not in list(inner_scores.columns): # if there is no accuracy macro column set to dummy value
    #    inner_scores['accuracy macro'] = -999 
    best_model = (inner_scores
                .groupby(params).agg({
                    'F1 macro':'mean',
                    'F1 micro':'mean',
                    'F1 weighted':'mean',
                    'ROC AUC macro':'mean',
                    'precision macro':'mean',
                    'recall macro':'mean'
                    }).sort_values('F1 macro',ascending=False).reset_index()).to_dict('records')[0]
    
    best_model = pd.DataFrame(best_model,index=[modelname])
    #best_model.rename(columns={'F1 macro': 'F1', 'ROC AUC macro': 'ROC AUC', 'precision macro':'precision', 'recall macro':'recall', 'accuracy macro':'accuracy'}, inplace=True)
    
    
    ## Get label information
    allLabels = [x for x in inner_scores.columns if 'F1' in x] 
    allLabels.remove('F1 macro')
    allLabels.remove('F1 micro')
    allLabels.remove('F1 weighted')
    allLabels.remove('F1 samples')
    allLabels = [s.replace('F1 - ', '') for s in allLabels]
    
    #best_model['label_names'] = "temp"
    #best_model.at[modelname,'label_names'] = [s.replace(modelname + '.','') for s in allLabels]
    
    
    ## Get label F1s
    #labelScores = []
    #for label in allLabels:
    #    best_model_temp = (inner_scores
    #              .groupby(params).agg({
    #                  'F1'+' - ' + label:'mean'
    #                  }).sort_values('F1'+' - ' + label,ascending=False).reset_index()).to_dict('records')[0]
    #    best_model_temp = pd.DataFrame(best_model_temp, index=[modelname])
    #    labelScores.append(best_model_temp['F1 - '+label])
    #    
    #best_model['label_F1s'] = "temp"
    #best_model.at[modelname,'label_F1s'] = [ '%.2f' % elem for elem in labelScores]
    
    
    ## Start loop to get label scores
    for label in allLabels:
        # get the best model
        best_model_temp = (inner_scores
                           .groupby(params).agg({
                               'F1'+' - ' + label:'mean',
                               'ROC AUC'+' - ' + label:'mean',
                               'precision'+' - ' + label:'mean',
                               'recall'+' - ' + label:'mean',
                               'accuracy'+' - ' + label:'mean'
                           }).sort_values('F1'+' - ' + label,ascending=False).reset_index()).to_dict('records')[0]
        best_model_temp = pd.DataFrame(best_model_temp, index=[modelname])

        # take label name out of column names so that everything is general 
        best_model_temp.columns = [s.replace(label, 'label') for s in list(best_model_temp.columns)] 

        # add a column name saying the label
        simpleLabel = label.replace(modelname + '.', '')
        best_model_temp.insert(0, 'label name', simpleLabel) 
        
        # join all together
        best_model = pd.concat([best_model, best_model_temp])
    
    # re-arrange so hyperparameters are at the end
    best_model = best_model[[c for c in best_model if c not in params] 
        + [c for c in params if c in best_model]]
    
    del inner_scores, inner_df, allLabels, best_model_temp
    return best_model

In [4]:
test2=get_best_model_multi('adapt_to_threat',f'/home/dveytia/ORO-map-relevance/outputs/model_selection/adapt_to_threat_model_selection_', 3)
test2

Unnamed: 0,F1 macro,F1 micro,F1 weighted,ROC AUC macro,precision macro,recall macro,label name,F1 - label,ROC AUC - label,precision - label,recall - label,accuracy - label,batch_size,weight_decay,learning_rate,num_epochs,class_weight
adapt_to_threat,0.470982,0.528632,0.542695,0.835704,0.540875,0.494483,,,,,,,32,0.0,5e-05,4,"{0: 2.116788321167883, 1: 12.774193548387096, ..."
adapt_to_threat,,,,,,,Human,0.786876,0.924077,0.746748,0.833221,0.854892,16,0.0,5e-05,3,-1
adapt_to_threat,,,,,,,Natural,0.463664,0.830197,0.37153,0.683333,0.884352,32,0.0,5e-05,3,"{0: 2.116788321167883, 1: 12.774193548387096, ..."
adapt_to_threat,,,,,,,Both,0.335922,0.736343,0.489107,0.26874,0.854521,32,0.0,5e-05,4,"{0: 2.116788321167883, 1: 12.774193548387096, ..."


In [8]:
params = ['batch_size','weight_decay','learning_rate','num_epochs','class_weight']
test3 = pd.concat([test2, test1])

# arrange params at the end
test3 = test3[[c for c in test3 if c not in params] 
        + [c for c in params if c in test3]] 

# arrange label name at the beginning
temp_cols=test3.columns.tolist()
index=test3.columns.get_loc("label name")
new_cols=temp_cols[index:index+1] + temp_cols[0:index] + temp_cols[index+1:]
test3=test3[new_cols]
print(test3)

                   label name  F1 macro  F1 micro  F1 weighted  ROC AUC macro  \
adapt_to_threat           NaN  0.470982  0.528632     0.542695       0.835704   
adapt_to_threat         Human       NaN       NaN          NaN            NaN   
adapt_to_threat       Natural       NaN       NaN          NaN            NaN   
adapt_to_threat          Both       NaN       NaN          NaN            NaN   
climate_mitigation        NaN       NaN       NaN          NaN            NaN   

                    precision macro  recall macro  F1 - label  \
adapt_to_threat            0.540875      0.494483         NaN   
adapt_to_threat                 NaN           NaN    0.786876   
adapt_to_threat                 NaN           NaN    0.463664   
adapt_to_threat                 NaN           NaN    0.335922   
climate_mitigation              NaN           NaN         NaN   

                    ROC AUC - label  precision - label  ...        F1  \
adapt_to_threat                 NaN              

# Get all model scores for all single label models

In [6]:
# For the screening model
screenModelScores = get_best_model_single('screen',f'/home/dveytia/ORO-map-relevance/outputs/model_selection/screen_model_selection_', 5)

print(screenModelScores)

              F1   ROC AUC  precision    recall  accuracy  batch_size  \
screen  0.702171  0.911164   0.663529  0.760859  0.868857          16   

        weight_decay  learning_rate  num_epochs  class_weight  
screen           0.0        0.00001           4            -1  


In [7]:
# create a list of all the single label models to loop through
singleModels = ['climate_mitigation','Forecast','impact_ncp.Any','impact_nature','blue_carbon',
                'biodiversity_metric', 'restoration', 'safe_fish', 'safe_space','societal_implemented'] 

for model  in singleModels:
    print(model)
    temp = get_best_model_single(model,f'/home/dveytia/ORO-map-relevance/outputs/model_selection/{model}_model_selection_', 3)
    if model == singleModels[0]:
        singleModelScores = temp
    else:
        singleModelScores = pd.concat([singleModelScores, temp])

print(singleModelScores.head(2))
print(singleModelScores.tail(2))

climate_mitigation
Forecast
impact_ncp.Any
impact_nature
blue_carbon
biodiversity_metric
restoration
safe_fish
safe_space
societal_implemented
                          F1   ROC AUC  precision    recall  accuracy  \
climate_mitigation  0.802477  0.938158   0.726099  0.897321  0.879555   
Forecast            0.559951  0.849199   0.466964  0.730504  0.863921   

                    batch_size  weight_decay  learning_rate  num_epochs  \
climate_mitigation          16           0.0        0.00001           2   
Forecast                    32           0.0        0.00005           4   

                                    class_weight  
climate_mitigation                            -1  
Forecast            {0: 1, 1: 7.714285714285714}  
                            F1   ROC AUC  precision    recall  accuracy  \
safe_space            0.834123  0.933927   0.768850  0.912925  0.873261   
societal_implemented  0.642857  0.583932   0.539927  0.852941  0.533662   

                      batch_size

# Get model scores for all multi label models

In [5]:
# create a list of all the multi label models to loop through
multiModels = ['oro_branch','oro_any_mitigation','oro_any_nature', 'oro_any_societal',
               'data_type',
               'adapt_to_threat','adapt_to_threat_simplified','adapt_to_threat_simplified2', 
               'climate_threat','climate_threat_simplified', 
               'ecosystem_type','ecosystem_type_simplified','ecosystem_type_simplified2',
               'impact_ncp_nested',
              'm_co2_ocean_storage', 'm_co2_removal',
               'marine_system', 
               'method_type', 'method_type_nested','method_type_simplified',
               'oro_development_stage','oro_development_stage_mitigation','oro_development_stage_nature', 'oro_development_stage_societal',
              'scientific_discipline'] # add data

for model  in multiModels:
    print(model)
    temp = get_best_model_multi(model,f'/home/dveytia/ORO-map-relevance/outputs/model_selection/{model}_model_selection_', 3)
    if model == multiModels[0]:
        multiModelScores = temp
    else:
        multiModelScores = pd.concat([multiModelScores, temp])

print(multiModelScores.head(2))

oro_branch
oro_any_mitigation
oro_any_nature
oro_any_societal
data_type
adapt_to_threat
adapt_to_threat_simplified
adapt_to_threat_simplified2
climate_threat
climate_threat_simplified
ecosystem_type
ecosystem_type_simplified
ecosystem_type_simplified2
impact_ncp_nested
m_co2_ocean_storage
m_co2_removal
marine_system
method_type
method_type_nested
method_type_simplified
oro_development_stage
oro_development_stage_mitigation
oro_development_stage_nature
oro_development_stage_societal
scientific_discipline
            F1 macro  F1 micro  F1 weighted  ROC AUC macro  precision macro  \
oro_branch  0.827799  0.863254     0.858565       0.938578         0.851071   
oro_branch       NaN       NaN          NaN            NaN              NaN   

            recall macro  label name  F1 - label  ROC AUC - label  \
oro_branch      0.816377         NaN         NaN              NaN   
oro_branch           NaN  Mitigation    0.932975         0.983207   

            precision - label  recall - label

# Join and write all model scores to a csv

In [8]:
best_models_all=pd.concat([screenModelScores, singleModelScores, multiModelScores])

# arrange params at the end
params = ['batch_size','weight_decay','learning_rate','num_epochs','class_weight']
best_models_all = best_models_all[[c for c in best_models_all if c not in params] 
        + [c for c in params if c in best_models_all]] 

# arrange label name at the beginning
temp_cols=best_models_all.columns.tolist()
index=best_models_all.columns.get_loc("label name")
new_cols=temp_cols[index:index+1] + temp_cols[0:index] + temp_cols[index+1:]
best_models_all=best_models_all[new_cols]
#best_models_all.head
#list(best_models_all.columns)

# make the index (model name) its own column and place in position 0
best_models_all['model'] = best_models_all.index
temp_cols=best_models_all.columns.tolist()
index=best_models_all.columns.get_loc("model")
new_cols=temp_cols[index:index+1] + temp_cols[0:index] + temp_cols[index+1:]
best_models_all=best_models_all[new_cols]

# check all the column names
list(best_models_all.columns)

['model',
 'label name',
 'F1',
 'ROC AUC',
 'precision',
 'recall',
 'accuracy',
 'F1 macro',
 'F1 micro',
 'F1 weighted',
 'ROC AUC macro',
 'precision macro',
 'recall macro',
 'F1 - label',
 'ROC AUC - label',
 'precision - label',
 'recall - label',
 'accuracy - label',
 'batch_size',
 'weight_decay',
 'learning_rate',
 'num_epochs',
 'class_weight']

In [9]:
best_models_all.to_csv(f'/home/dveytia/ORO-map-relevance/outputs/summary_model_scores.csv', index=False)

# No longer needed: Calculate the scores for each label (for multi-label models)

In the above functions, for a multi label model only the macro scores are reported. If a dataframe is desired that includes the scores for every label, use the following (note that code is not complete)

In [14]:
def get_best_model_labels(modelname, file_path_prefix, k_range): 
    inner_scores = []
    params = ['batch_size','weight_decay','learning_rate','num_epochs','class_weight']
    
    for k in range(k_range): 
        inner_df = pd.read_csv(f'{file_path_prefix}{k}.csv')
        inner_df = inner_df.sort_values('F1 macro', ascending=False).reset_index(drop=True)
        inner_scores += inner_df.to_dict('records')
    
    inner_scores = pd.DataFrame.from_dict(inner_scores).fillna(-1)
    
    if 'accuracy macro' not in list(inner_scores.columns): # if there is no accuracy macro column set to dummy value
        inner_scores['accuracy macro'] = -999 
    
    # Loop across the different labels to get the best scores
    
    # get the sames of the different labels
    allLabels = [x for x in inner_scores.columns if 'F1' in x] 
    allLabels.remove('F1 macro')
    allLabels.remove('F1 micro')
    allLabels.remove('F1 weighted')
    allLabels.remove('F1 samples')
    allLabels = [s.replace('F1 - ', '') for s in allLabels]
    
    # start loop
    for label in allLabels:
        
        # get the best model
        best_model_temp = (inner_scores
                  .groupby(params).agg({
                      'F1'+' - ' + label:'mean',
                      'ROC AUC'+' - ' + label:'mean',
                      'precision'+' - ' + label:'mean',
                      'recall'+' - ' + label:'mean',
                      'accuracy'+' - ' + label:'mean'
                      }).sort_values('F1'+' - ' + label,ascending=False).reset_index()).to_dict('records')[0]
        best_model_temp = pd.DataFrame(best_model_temp, index=[modelname])

        # take label name out of column names so that everything is general 
        best_model_temp.columns = [s.replace(' - ' + label, '') for s in list(best_model_temp.columns)] 

        # add a column name saying the label
        best_model_temp.insert(0, 'label', label) 
        
        # join all together
        if label == allLabels[0]:
            labelScores = best_model_temp
        else:
            labelScores = pd.concat([labelScores, best_model_temp])
    del inner_scores, inner_df, allLabels, best_model_temp
    return labelScores

In [15]:
for model  in multiModels:
    print(model)
    temp = get_best_model_labels(model,f'/home/dveytia/ORO-map-relevance/outputs/model_selection/{model}_model_selection_', 3)
    if model == multiModels[0]:
        multiModelLabelScores = temp
    else:
        multiModelLabelScores = pd.concat([multiModelLabelScores, temp])

multiModelLabelScores.to_csv(f'/home/dveytia/ORO-map-relevance/outputs/summary_model_label_scores.csv', index=True)


data_type
adapt_to_threat
adapt_to_threat_simplified
adapt_to_threat_simplified2
climate_threat
ecosystem_type
ecosystem_type_simplified
impact_ncp_nested
m_co2_ocean_storage
m_co2_removal
marine_system
method_type
method_type_nested
oro_development_stage
oro_development_stage_mitigation
oro_development_stage_nature
oro_development_stage_societal
scientific_discipline
