In [1]:
import pandas as pd

# Make functions to extract best model score

Because the model scores have different numbers of columns depending on if it is single label or multi label, format two different types of functions. Test them out on an example to make sure they produce similar results

In [2]:
def get_best_model_single(modelname, file_path_prefix, k_range):
    inner_scores = []
    params = ['batch_size','weight_decay','learning_rate','num_epochs','class_weight']
    
    for k in range(k_range): #Change to 5 if you are using the binary
        inner_df = pd.read_csv(f'{file_path_prefix}{k}.csv')
        inner_df = inner_df.sort_values('F1',ascending=False).reset_index(drop=True)
        inner_scores += inner_df.to_dict('records')
    
    inner_scores = pd.DataFrame.from_dict(inner_scores).fillna(-1)
    best_model = (inner_scores
                  .groupby(params).agg({
                      'F1':'mean',
                      'ROC AUC':'mean',
                      'precision':'mean',
                      'recall':'mean',
                      'accuracy':'mean'
                      }).sort_values('F1',ascending=False).reset_index()).to_dict('records')[0]
    best_model = pd.DataFrame(best_model,index=[modelname])
    del inner_scores, inner_df
    return best_model

In [3]:
test2=get_best_model_single('climate_mitigation',f'/home/dveytia/ORO-map-relevance/outputs/model_selection/climate_mitigation_model_selection_', 3)
test2

Unnamed: 0,batch_size,weight_decay,learning_rate,num_epochs,class_weight,F1,ROC AUC,precision,recall,accuracy
climate_mitigation,16,0.0,1e-05,2,-1,0.802477,0.938158,0.726099,0.897321,0.879555


In [3]:
def get_best_model_multi(modelname, file_path_prefix, k_range): 
    inner_scores = []
    params = ['batch_size','weight_decay','learning_rate','num_epochs','class_weight']
    
    for k in range(k_range): 
        inner_df = pd.read_csv(f'{file_path_prefix}{k}.csv')
        inner_df = inner_df.sort_values('F1 macro', ascending=False).reset_index(drop=True)
        inner_scores += inner_df.to_dict('records')
    
    inner_scores = pd.DataFrame.from_dict(inner_scores).fillna(-1)
    
    if 'accuracy macro' not in list(inner_scores.columns): # if there is no accuracy macro column set to dummy value
        inner_scores['accuracy macro'] = -999 
        
    best_model = (inner_scores
                  .groupby(params).agg({
                      'F1 macro':'mean',
                      'ROC AUC macro':'mean',
                      'precision macro':'mean',
                      'recall macro':'mean',
                      'accuracy macro':'mean'
                      }).sort_values('F1 macro',ascending=False).reset_index()).to_dict('records')[0]
    
    best_model = pd.DataFrame(best_model,index=[modelname])
    best_model.rename(columns={'F1 macro': 'F1', 'ROC AUC macro': 'ROC AUC', 'precision macro':'precision', 'recall macro':'recall', 'accuracy macro':'accuracy'}, inplace=True)
    
    
    ## Get label information
    allLabels = [x for x in inner_scores.columns if 'F1' in x] 
    allLabels.remove('F1 macro')
    allLabels.remove('F1 micro')
    allLabels.remove('F1 weighted')
    allLabels.remove('F1 samples')
    allLabels = [s.replace('F1 - ', '') for s in allLabels]
    
    best_model['label_names'] = "temp"
    best_model.at[modelname,'label_names'] = [s.replace(modelname + '.','') for s in allLabels]
    
    
    ## Get label F1s
    labelScores = []
    for label in allLabels:
        best_model_temp = (inner_scores
                  .groupby(params).agg({
                      'F1'+' - ' + label:'mean'
                      }).sort_values('F1'+' - ' + label,ascending=False).reset_index()).to_dict('records')[0]
        best_model_temp = pd.DataFrame(best_model_temp, index=[modelname])
        labelScores.append(best_model_temp['F1 - '+label])
        
    best_model['label_F1s'] = "temp"
    best_model.at[modelname,'label_F1s'] = [ '%.2f' % elem for elem in labelScores]
    
    
    del inner_scores, inner_df
    return best_model

In [23]:
test1=get_best_model_multi('adapt_to_threat',f'/home/dveytia/ORO-map-relevance/outputs/model_selection/adapt_to_threat_model_selection_', 3)
test1

Unnamed: 0,batch_size,weight_decay,learning_rate,num_epochs,class_weight,F1,ROC AUC,precision,recall,accuracy,label_names,label_F1s
adapt_to_threat,32,0.0,5e-05,4,"{0: 2.116788321167883, 1: 12.774193548387096, ...",0.470982,0.835704,0.540875,0.494483,-999.0,"[Human, Natural, Both]","[0.79, 0.46, 0.34]"


In [24]:
pd.concat([test2, test1])

Unnamed: 0,batch_size,weight_decay,learning_rate,num_epochs,class_weight,F1,ROC AUC,precision,recall,accuracy,label_names,label_F1s
climate_mitigation,16,0.0,1e-05,2,-1,0.802477,0.938158,0.726099,0.897321,0.879555,,
adapt_to_threat,32,0.0,5e-05,4,"{0: 2.116788321167883, 1: 12.774193548387096, ...",0.470982,0.835704,0.540875,0.494483,-999.0,"[Human, Natural, Both]","[0.79, 0.46, 0.34]"


# Get all model scores for all single label models

In [9]:
# create a list of all the single label models to loop through
singleModels = ['climate_mitigation','Forecast','impact_ncp.Any','blue_carbon'] 

for model  in singleModels:
    print(model)
    temp = get_best_model_single(model,f'/home/dveytia/ORO-map-relevance/outputs/model_selection/{model}_model_selection_', 3)
    if model == singleModels[0]:
        singleModelScores = temp
    else:
        singleModelScores = pd.concat([singleModelScores, temp])

print(singleModelScores.head)

climate_mitigation
Forecast
impact_ncp.Any
blue_carbon
<bound method NDFrame.head of                     batch_size  weight_decay  learning_rate  num_epochs  \
climate_mitigation          16           0.0        0.00001           2   
Forecast                    32           0.0        0.00005           4   
impact_ncp.Any              16           0.0        0.00001           4   
blue_carbon                 16           0.0        0.00005           4   

                                    class_weight        F1   ROC AUC  \
climate_mitigation                            -1  0.802477  0.938158   
Forecast            {0: 1, 1: 7.714285714285714}  0.559951  0.849199   
impact_ncp.Any                                -1  0.464714  0.783744   
blue_carbon                                   -1  0.894561  0.988171   

                    precision    recall  accuracy  
climate_mitigation   0.726099  0.897321  0.879555  
Forecast             0.466964  0.730504  0.863921  
impact_ncp.Any       0

# Get model scores for all multi label models

In [8]:
# create a list of all the multi label models to loop through
multiModels = ['data_type','adapt_to_threat','adapt_to_threat_simplified','adapt_to_threat_simplified2', 
               'climate_threat', 'ecosystem_type','ecosystem_type_simplified',
               'impact_ncp_nested',
              'm_co2_ocean_storage', 'm_co2_removal',
               'marine_system', 'method_type', 'method_type_nested','oro_development_stage',
               'oro_development_stage_mitigation','oro_development_stage_nature', 'oro_development_stage_societal',
              'scientific_discipline'] # add data

for model  in multiModels:
    print(model)
    temp = get_best_model_multi(model,f'/home/dveytia/ORO-map-relevance/outputs/model_selection/{model}_model_selection_', 3)
    if model == multiModels[0]:
        multiModelScores = temp
    else:
        multiModelScores = pd.concat([multiModelScores, temp])

print(multiModelScores.head)

data_type
adapt_to_threat
adapt_to_threat_simplified
adapt_to_threat_simplified2
climate_threat
ecosystem_type
ecosystem_type_simplified
impact_ncp_nested
m_co2_ocean_storage
m_co2_removal
marine_system
method_type
method_type_nested
oro_development_stage
oro_development_stage_mitigation
oro_development_stage_nature
oro_development_stage_societal
scientific_discipline
<bound method NDFrame.head of                                   batch_size  weight_decay  learning_rate  \
data_type                                 32           0.0        0.00001   
adapt_to_threat                           32           0.0        0.00005   
adapt_to_threat_simplified                16           0.0        0.00005   
adapt_to_threat_simplified2               32           0.0        0.00005   
climate_threat                            16           0.0        0.00005   
ecosystem_type                            16           0.0        0.00005   
ecosystem_type_simplified                 16           0.0  

# Join and write all model scores to a csv

In [10]:
best_models_all=pd.concat([singleModelScores, multiModelScores])

In [12]:
best_models_all.to_csv(f'/home/dveytia/ORO-map-relevance/outputs/summary_model_scores.csv', index=True)

# Calcualte the scores for each label (for multi-label models)

In the above functions, for a multi label model only the macro scores are reported. If a dataframe is desired that includes the scores for every label, use the following (note that code is not complete)

In [14]:
def get_best_model_labels(modelname, file_path_prefix, k_range): 
    inner_scores = []
    params = ['batch_size','weight_decay','learning_rate','num_epochs','class_weight']
    
    for k in range(k_range): 
        inner_df = pd.read_csv(f'{file_path_prefix}{k}.csv')
        inner_df = inner_df.sort_values('F1 macro', ascending=False).reset_index(drop=True)
        inner_scores += inner_df.to_dict('records')
    
    inner_scores = pd.DataFrame.from_dict(inner_scores).fillna(-1)
    
    if 'accuracy macro' not in list(inner_scores.columns): # if there is no accuracy macro column set to dummy value
        inner_scores['accuracy macro'] = -999 
    
    # Loop across the different labels to get the best scores
    
    # get the sames of the different labels
    allLabels = [x for x in inner_scores.columns if 'F1' in x] 
    allLabels.remove('F1 macro')
    allLabels.remove('F1 micro')
    allLabels.remove('F1 weighted')
    allLabels.remove('F1 samples')
    allLabels = [s.replace('F1 - ', '') for s in allLabels]
    
    # start loop
    for label in allLabels:
        
        # get the best model
        best_model_temp = (inner_scores
                  .groupby(params).agg({
                      'F1'+' - ' + label:'mean',
                      'ROC AUC'+' - ' + label:'mean',
                      'precision'+' - ' + label:'mean',
                      'recall'+' - ' + label:'mean',
                      'accuracy'+' - ' + label:'mean'
                      }).sort_values('F1'+' - ' + label,ascending=False).reset_index()).to_dict('records')[0]
        best_model_temp = pd.DataFrame(best_model_temp, index=[modelname])

        # take label name out of column names so that everything is general 
        best_model_temp.columns = [s.replace(' - ' + label, '') for s in list(best_model_temp.columns)] 

        # add a column name saying the label
        best_model_temp.insert(0, 'label', label) 
        
        # join all together
        if label == allLabels[0]:
            labelScores = best_model_temp
        else:
            labelScores = pd.concat([labelScores, best_model_temp])
    del inner_scores, inner_df, allLabels, best_model_temp
    return labelScores

In [15]:
for model  in multiModels:
    print(model)
    temp = get_best_model_labels(model,f'/home/dveytia/ORO-map-relevance/outputs/model_selection/{model}_model_selection_', 3)
    if model == multiModels[0]:
        multiModelLabelScores = temp
    else:
        multiModelLabelScores = pd.concat([multiModelLabelScores, temp])

multiModelLabelScores.to_csv(f'/home/dveytia/ORO-map-relevance/outputs/summary_model_label_scores.csv', index=True)


data_type
adapt_to_threat
adapt_to_threat_simplified
adapt_to_threat_simplified2
climate_threat
ecosystem_type
ecosystem_type_simplified
impact_ncp_nested
m_co2_ocean_storage
m_co2_removal
marine_system
method_type
method_type_nested
oro_development_stage
oro_development_stage_mitigation
oro_development_stage_nature
oro_development_stage_societal
scientific_discipline
