In [30]:
import pandas as pd
import wandb
import os, shutil

In [2]:
api = wandb.Api()
entity, project = "pratik24111991", "TFBS_Finetuned_Models_Work"
runs = api.runs(entity + "/" + project)

In [3]:
summary_list, config_list, name_list, tag_list = [], [], [], []
for run in runs:
    # .summary contains output keys/values for
    # metrics such as accuracy.
    #  We call ._json_dict to omit large files
    summary_list.append(run.summary._json_dict)

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append({k: v for k, v in run.config.items() if not k.startswith("_")})

    # .name is the human-readable name of the run.
    name_list.append(run.name)
    tag_list.append(run.tags)

runs_df = pd.DataFrame(
    {"summary": summary_list, "config": config_list, "name": name_list, "tags":tag_list}
)

In [4]:
runs_df

Unnamed: 0,summary,config,name,tags
0,{'_wandb': {'runtime': 63}},"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_29_55_logstep=...,"[TFBS_NonTFBS, ZNF785, e-3]"
1,"{'_step': 126, 'train_loss': 0.701253188507897...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_25_06_logstep=...,"[PHF21A, TFBS_NonTFBS, e-3]"
2,"{'eval_f1': 0.6248965880072537, 'eval_auc': 0....","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_20_46_logstep=...,"[TFBS_NonTFBS, ZBED1, e-6]"
3,"{'eval_acc': 0.8145533532446375, 'train_loss':...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_20_18_logstep=...,"[PHF21A, TFBS_NonTFBS, e-3]"
4,"{'eval_auc': 0.3969597065276369, 'eval_recall'...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_15_31_logstep=...,"[PHF21A, TFBS_NonTFBS, e-3]"
...,...,...,...,...
3370,"{'eval_Validation loss': 0.6931307792663575, '...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,"[SAP30, TFBS_NonTFBS, e-3]"
3371,"{'eval_auc': 0.8798126755400077, 'eval_Validat...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,"[SAP30, TFBS_NonTFBS, e-6]"
3372,"{'Confusion Matrix': {'size': 59356, '_type': ...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,"[SAP30, TFBS_NonTFBS, e-5]"
3373,"{'_runtime': 604.1279644966125, 'train_loss': ...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,"[SAP30, TFBS_NonTFBS, e-4]"


In [5]:
# Define a function to filter tags
def filter_tags(tags):
    return [tag for tag in tags if '-' not in tag and 'TFBS' not in tag]

In [6]:
runs_df['tags'] = runs_df['tags'].apply(filter_tags)

In [7]:
runs_df

Unnamed: 0,summary,config,name,tags
0,{'_wandb': {'runtime': 63}},"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_29_55_logstep=...,[ZNF785]
1,"{'_step': 126, 'train_loss': 0.701253188507897...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_25_06_logstep=...,[PHF21A]
2,"{'eval_f1': 0.6248965880072537, 'eval_auc': 0....","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_20_46_logstep=...,[ZBED1]
3,"{'eval_acc': 0.8145533532446375, 'train_loss':...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_20_18_logstep=...,[PHF21A]
4,"{'eval_auc': 0.3969597065276369, 'eval_recall'...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_15_31_logstep=...,[PHF21A]
...,...,...,...,...
3370,"{'eval_Validation loss': 0.6931307792663575, '...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,[SAP30]
3371,"{'eval_auc': 0.8798126755400077, 'eval_Validat...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,[SAP30]
3372,"{'Confusion Matrix': {'size': 59356, '_type': ...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,[SAP30]
3373,"{'_runtime': 604.1279644966125, 'train_loss': ...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,[SAP30]


In [8]:
# Normalize the summary column to create a DataFrame of summaries
summaries_df = pd.json_normalize(runs_df['summary'])

# Concatenate the new summary columns with the original DataFrame (minus the old 'summary' column)
expanded_runs_df = pd.concat([runs_df.drop('summary', axis=1), summaries_df], axis=1)
# Assuming expanded_runs_df is your DataFrame
expanded_runs_df['tags'] = expanded_runs_df['tags'].apply(lambda x: x[0] if x else None)

In [9]:
expanded_runs_df

Unnamed: 0,config,name,tags,_wandb.runtime,_step,train_loss,eval_recall,learning_rate,eval_precision,_timestamp,...,eval_auc,eval_mcc,eval_Validation loss,Confusion Matrix.width,Confusion Matrix.format,Confusion Matrix.height,Confusion Matrix.sha256,Confusion Matrix.path,Confusion Matrix.size,Confusion Matrix._type
0,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_29_55_logstep=...,ZNF785,63,,,,,,,...,,,,,,,,,,
1,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_25_06_logstep=...,PHF21A,278,126.0,0.701253,0.500000,5.333333e-04,0.250204,1.711525e+09,...,0.447602,0.000000,0.693607,1200.0,png,1200.0,9588f8f016a2943f043f3d407aae6d2d72d871dbde408f...,media/images/Confusion Matrix_84_9588f8f016a29...,51357.0,image-file
2,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_20_46_logstep=...,ZBED1,591,282.0,0.654189,0.625171,1.319444e-07,0.625522,1.711525e+09,...,0.683296,0.250692,0.649277,1200.0,png,1200.0,6991426f0b9205b465cc644ad1c78d4efc619fb9758700...,media/images/Confusion Matrix_188_6991426f0b92...,57489.0,image-file
3,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_20_18_logstep=...,PHF21A,278,126.0,0.341377,0.814529,1.777778e-04,0.815707,1.711524e+09,...,0.897842,0.630234,0.461001,1200.0,png,1200.0,47fb48fea6738c4408c4904d877c20acb32664b5446f97...,media/images/Confusion Matrix_84_47fb48fea6738...,55686.0,image-file
4,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_15_31_logstep=...,PHF21A,276,126.0,0.696598,0.500000,5.333333e-04,0.250204,1.711524e+09,...,0.396960,0.000000,0.701757,1200.0,png,1200.0,9588f8f016a2943f043f3d407aae6d2d72d871dbde408f...,media/images/Confusion Matrix_84_9588f8f016a29...,51357.0,image-file
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3370,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,SAP30,692,327.0,0.697234,0.500000,3.873874e-04,0.251934,1.709079e+09,...,0.505400,0.000000,0.693131,1200.0,png,1200.0,8bb7708fa5de7feeb1d06e439d1d08483f9f17af9479c6...,media/images/Confusion Matrix_218_8bb7708fa5de...,49619.0,image-file
3371,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,SAP30,693,324.0,0.547333,0.798311,1.381381e-07,0.799096,1.709079e+09,...,0.879813,0.597406,0.537199,1200.0,png,1200.0,5a78816cab0e1fa1c6a6f02ac5e67d8fb0dd137cfec7c4...,media/images/Confusion Matrix_216_5a78816cab0e...,57947.0,image-file
3372,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,SAP30,700,324.0,0.242748,0.899412,1.381381e-06,0.899874,1.709079e+09,...,0.962615,0.799286,0.245715,1200.0,png,1200.0,265f5743eafafb5593d85b3f5b25a8bde9f25d156c0678...,media/images/Confusion Matrix_216_265f5743eafa...,59356.0,image-file
3373,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,SAP30,696,324.0,0.171405,0.887280,1.381381e-05,0.889443,1.709079e+09,...,0.951327,0.776720,0.341513,1200.0,png,1200.0,7375fb8417576ec4fc3d46996dd222308076be3b318ac5...,media/images/Confusion Matrix_216_7375fb841757...,58933.0,image-file


In [10]:
# Remove rows with any NaN values and keep the original index
expanded_runs_df = expanded_runs_df.dropna().reset_index()
expanded_runs_df

Unnamed: 0,index,config,name,tags,_wandb.runtime,_step,train_loss,eval_recall,learning_rate,eval_precision,...,eval_auc,eval_mcc,eval_Validation loss,Confusion Matrix.width,Confusion Matrix.format,Confusion Matrix.height,Confusion Matrix.sha256,Confusion Matrix.path,Confusion Matrix.size,Confusion Matrix._type
0,1,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_25_06_logstep=...,PHF21A,278,126.0,0.701253,0.500000,5.333333e-04,0.250204,...,0.447602,0.000000,0.693607,1200.0,png,1200.0,9588f8f016a2943f043f3d407aae6d2d72d871dbde408f...,media/images/Confusion Matrix_84_9588f8f016a29...,51357.0,image-file
1,2,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_20_46_logstep=...,ZBED1,591,282.0,0.654189,0.625171,1.319444e-07,0.625522,...,0.683296,0.250692,0.649277,1200.0,png,1200.0,6991426f0b9205b465cc644ad1c78d4efc619fb9758700...,media/images/Confusion Matrix_188_6991426f0b92...,57489.0,image-file
2,3,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_20_18_logstep=...,PHF21A,278,126.0,0.341377,0.814529,1.777778e-04,0.815707,...,0.897842,0.630234,0.461001,1200.0,png,1200.0,47fb48fea6738c4408c4904d877c20acb32664b5446f97...,media/images/Confusion Matrix_84_47fb48fea6738...,55686.0,image-file
3,4,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_15_31_logstep=...,PHF21A,276,126.0,0.696598,0.500000,5.333333e-04,0.250204,...,0.396960,0.000000,0.701757,1200.0,png,1200.0,9588f8f016a2943f043f3d407aae6d2d72d871dbde408f...,media/images/Confusion Matrix_84_9588f8f016a29...,51357.0,image-file
4,5,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_03_13_00_logstep=...,ZNF579,454,210.0,0.568148,0.712468,4.166667e-07,0.714082,...,0.791798,0.426546,0.566603,1200.0,png,1200.0,ff2ba4e07c917facd2715b83969450a6a1912a513c8244...,media/images/Confusion Matrix_140_ff2ba4e07c91...,58748.0,image-file
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3286,3370,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,SAP30,692,327.0,0.697234,0.500000,3.873874e-04,0.251934,...,0.505400,0.000000,0.693131,1200.0,png,1200.0,8bb7708fa5de7feeb1d06e439d1d08483f9f17af9479c6...,media/images/Confusion Matrix_218_8bb7708fa5de...,49619.0,image-file
3287,3371,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,SAP30,693,324.0,0.547333,0.798311,1.381381e-07,0.799096,...,0.879813,0.597406,0.537199,1200.0,png,1200.0,5a78816cab0e1fa1c6a6f02ac5e67d8fb0dd137cfec7c4...,media/images/Confusion Matrix_216_5a78816cab0e...,57947.0,image-file
3288,3372,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,SAP30,700,324.0,0.242748,0.899412,1.381381e-06,0.899874,...,0.962615,0.799286,0.245715,1200.0,png,1200.0,265f5743eafafb5593d85b3f5b25a8bde9f25d156c0678...,media/images/Confusion Matrix_216_265f5743eafa...,59356.0,image-file
3289,3373,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,SAP30,696,324.0,0.171405,0.887280,1.381381e-05,0.889443,...,0.951327,0.776720,0.341513,1200.0,png,1200.0,7375fb8417576ec4fc3d46996dd222308076be3b318ac5...,media/images/Confusion Matrix_216_7375fb841757...,58933.0,image-file


In [11]:
# Group by 'tags' and find the index of the max 'eval_acc' for each group
idx = expanded_runs_df.groupby('tags')['eval_acc'].idxmax()

# Use the indices to select rows from the original DataFrame
best_acc_df = expanded_runs_df.loc[idx].reset_index(drop=True)
best_acc_df

Unnamed: 0,index,config,name,tags,_wandb.runtime,_step,train_loss,eval_recall,learning_rate,eval_precision,...,eval_auc,eval_mcc,eval_Validation loss,Confusion Matrix.width,Confusion Matrix.format,Confusion Matrix.height,Confusion Matrix.sha256,Confusion Matrix.path,Confusion Matrix.size,Confusion Matrix._type
0,1134,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_11th_2024_07_48_56_logstep=...,AGO2,1362,633.0,0.264199,0.895822,0.000004,0.898803,...,0.936196,0.794619,0.298919,1200.0,png,1200.0,958cd42b536bc3994165310ae2cbd90fbd514c41e93d89...,media/images/Confusion Matrix_422_958cd42b536b...,60413.0,image-file
1,498,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_22th_2024_03_02_22_logstep=...,ARHGAP35,154,66.0,0.333016,0.821965,0.000019,0.823799,...,0.901186,0.645762,0.402552,1200.0,png,1200.0,05bda9ec50bc339493dca11b5d394511f04b407205e0ee...,media/images/Confusion Matrix_44_05bda9ec50bc3...,57200.0,image-file
2,2085,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_03th_2024_07_58_34_logstep=...,ARID3A,1699,795.0,0.256148,0.764785,0.000036,0.768812,...,0.837500,0.533582,0.765750,1200.0,png,1200.0,9790b8d093b46b5410432abe0ae5c05698c8f6338f0c86...,media/images/Confusion Matrix_530_9790b8d093b4...,59950.0,image-file
3,1322,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_10th_2024_18_51_55_logstep=...,ARNT,1831,873.0,0.404111,0.750620,0.000121,0.750809,...,0.826269,0.501429,0.593425,1200.0,png,1200.0,4654e6e9f4b65a33e5232fe1c4e4f3ad0d152246fc91aa...,media/images/Confusion Matrix_582_4654e6e9f4b6...,58394.0,image-file
4,2735,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_29th_2024_20_07_02_logst...,BLC3,7580,3555.0,0.111454,0.908947,0.000034,0.909569,...,0.960377,0.818516,0.361423,1200.0,png,1200.0,62feb30361869d99e534374c07accb544ecca05e54c11e...,media/images/Confusion Matrix_2370_62feb303618...,62161.0,image-file
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,352,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_22th_2024_16_27_48_logstep=...,ZNF785,219,99.0,0.223840,0.875766,0.000019,0.875901,...,0.949737,0.751668,0.293426,1200.0,png,1200.0,774761deed86c76bcda8a44b4a3b2a90bb747b702d742e...,media/images/Confusion Matrix_66_774761deed86c...,56456.0,image-file
147,1636,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_09th_2024_03_13_54_logstep=...,ZNF792,862,408.0,0.263192,0.808337,0.000126,0.812972,...,0.887947,0.621292,0.530071,1200.0,png,1200.0,0a705a0c192a9c707044695c28c4b400f4158812f32126...,media/images/Confusion Matrix_272_0a705a0c192a...,59776.0,image-file
148,246,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_24th_2024_11_52_13_logstep=...,ZSCAN23,275,126.0,0.371474,0.776157,0.000018,0.781929,...,0.858684,0.558057,0.493020,1200.0,png,1200.0,f1f3edfd44a11d0b78090ba0a1cd55c3cd31b4e702be1c...,media/images/Confusion Matrix_84_f1f3edfd44a11...,56203.0,image-file
149,2708,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_01th_2024_02_09_02_logstep=...,ZSCAN9,601,282.0,0.307224,0.812420,0.000132,0.812749,...,0.890887,0.625169,0.483263,1200.0,png,1200.0,e5c3db86ea9e90d0c522f3bbd8795316ce7d735f6f33ba...,media/images/Confusion Matrix_188_e5c3db86ea9e...,54036.0,image-file


In [13]:
best_acc_df.iloc[1]['config']

{'epochs': 10,
 'Dropout': 0.1,
 'batch_size': 1040,
 'Weight Decay': 0.005,
 'architecture': 'TFBS_Finetuned_Models',
 'learning_rate': 0.0001,
 'warm up percentage': 0.1,
 'Maximum sequence length': 100}

In [46]:
base_path= "/data/private/pdutta/DNABERT_output/TFBS/Finetuned_models"

In [47]:
for index, row in best_acc_df.iterrows():
    config = row['config']
    
    # Format the learning rate string for the folder name
    learning_rate_str = format(config['learning_rate'], '.1e').split('e-')[-1].lstrip('0')
    learning_rate_folder = f"e-{learning_rate_str}"

    tag = row['tags']
    
    # Construct the pattern to match the best model directory
    best_model_dir_pattern = f"_bs={config['batch_size']}_lr={config['learning_rate']}_wp={config['warm up percentage']}_dp={config['Dropout']}_wd={config['Weight Decay']}_len={config['Maximum sequence length']}_epoch={config['epochs']}.0"

    tag_directory = os.path.join(base_path, tag)
    learning_rate_directory = os.path.join(tag_directory, learning_rate_folder)
    
    if os.path.exists(learning_rate_directory):
        # Find the best model in the learning rate directory
        best_model_path = ""
        for model_dir in os.listdir(learning_rate_directory):
            if best_model_dir_pattern in model_dir:
                best_model_path = os.path.join(learning_rate_directory, model_dir)
                break

        # Delete other models in the learning rate directory
        if best_model_path:
            for model_dir in os.listdir(learning_rate_directory):
                full_model_dir = os.path.join(learning_rate_directory, model_dir)
                if full_model_dir != best_model_path:
                    print(f"Deleting model: {full_model_dir}")
                    shutil.rmtree(full_model_dir)
        else:
            print("No best model match found in the learning rate directory.")

        # Now, delete other learning rate directories under the tag
        for lr_dir in os.listdir(tag_directory):
            full_lr_dir_path = os.path.join(tag_directory, lr_dir)
            if full_lr_dir_path != learning_rate_directory:
                print(f"Deleting learning rate directory: {full_lr_dir_path}")
                shutil.rmtree(full_lr_dir_path)
    else:
        #print(f"Learning rate directory does not exist: {learning_rate_directory}")
        continue

Deleting model: /data/private/pdutta/DNABERT_output/TFBS/Finetuned_models/AGO2/e-5/logstep=211_bs=1040_lr=3e-05_wp=0.1_dp=0.1_wd=0.001_len=100_epoch=10.0
Deleting model: /data/private/pdutta/DNABERT_output/TFBS/Finetuned_models/AGO2/e-5/logstep=211_bs=1040_lr=3e-05_wp=0.1_dp=0.1_wd=0.005_len=100_epoch=10.0
Deleting model: /data/private/pdutta/DNABERT_output/TFBS/Finetuned_models/AGO2/e-5/logstep=211_bs=1040_lr=1e-05_wp=0.1_dp=0.1_wd=0.001_len=100_epoch=10.0
Deleting model: /data/private/pdutta/DNABERT_output/TFBS/Finetuned_models/AGO2/e-5/logstep=211_bs=1040_lr=1e-05_wp=0.1_dp=0.1_wd=0.0005_len=100_epoch=10.0
Deleting model: /data/private/pdutta/DNABERT_output/TFBS/Finetuned_models/AGO2/e-5/logstep=211_bs=1040_lr=1e-05_wp=0.1_dp=0.1_wd=0.005_len=100_epoch=10.0
Deleting learning rate directory: /data/private/pdutta/DNABERT_output/TFBS/Finetuned_models/AGO2/e-3
Deleting learning rate directory: /data/private/pdutta/DNABERT_output/TFBS/Finetuned_models/AGO2/e-4
Deleting learning rate dire

In [12]:
best_acc_df.sort_values(by="eval_acc", ascending=False)[['tags','eval_acc']]

Unnamed: 0,tags,eval_acc
62,RBM14,0.968908
72,SPI1,0.957358
42,MAFF,0.939551
78,THAP1,0.937964
14,E2F4,0.918800
...,...,...
35,ILF3,0.715873
10,CHD7,0.709527
58,PTRF,0.705040
79,THRAP3,0.683565


In [13]:
best_acc_df.sort_values(by="eval_acc", ascending=False)[['tags','eval_acc']].to_csv("TFBS_accuracy_Stat.tsv", sep="\t")

In [None]:
best_acc_df