In [1]:
import pandas as pd
import wandb
import os, shutil

In [2]:
api = wandb.Api()
entity, project = "pratik24111991", "TFBS_Finetuned_Models_Work"
runs = api.runs(entity + "/" + project)

In [3]:
summary_list, config_list, name_list, tag_list = [], [], [], []
for run in runs:
    # .summary contains output keys/values for
    # metrics such as accuracy.
    #  We call ._json_dict to omit large files
    summary_list.append(run.summary._json_dict)

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append({k: v for k, v in run.config.items() if not k.startswith("_")})

    # .name is the human-readable name of the run.
    name_list.append(run.name)
    tag_list.append(run.tags)

runs_df = pd.DataFrame(
    {"summary": summary_list, "config": config_list, "name": name_list, "tags":tag_list}
)

In [4]:
runs_df

Unnamed: 0,summary,config,name,tags
0,"{'eval_auc': 0.9879245179290536, 'eval_mcc': 0...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_10_33_logstep=...,"[SAFB, TFBS_NonTFBS, e-3]"
1,"{'eval_Validation loss': 0.6963582038879395, '...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_09_05_logstep=...,"[SAFB, TFBS_NonTFBS, e-3]"
2,"{'learning_rate': 0, '_wandb': {'runtime': 82}...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_07_33_logstep=...,"[SAFB, TFBS_NonTFBS, e-3]"
3,"{'eval_Validation loss': 0.6931957205136617, '...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_03_26_logstep=...,"[AGO1, TFBS_NonTFBS, e-3]"
4,"{'_wandb': {'runtime': 237}, 'eval_precision':...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_09_59_19_logstep=...,"[AGO1, TFBS_NonTFBS, e-3]"
...,...,...,...,...
5186,"{'eval_recall': 0.5, 'eval_Validation loss': 0...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,"[SAP30, TFBS_NonTFBS, e-3]"
5187,"{'eval_mcc': 0.5974061882261203, '_timestamp':...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,"[SAP30, TFBS_NonTFBS, e-6]"
5188,"{'_step': 324, 'eval_f1': 0.8994774307199206, ...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,"[SAP30, TFBS_NonTFBS, e-5]"
5189,"{'eval_recall': 0.8872796669859276, 'learning_...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,"[SAP30, TFBS_NonTFBS, e-4]"


In [5]:
# Define a function to filter tags
def filter_tags(tags):
    return [tag for tag in tags if '-' not in tag and 'TFBS' not in tag]

In [6]:
runs_df['tags'] = runs_df['tags'].apply(filter_tags)

In [7]:
runs_df

Unnamed: 0,summary,config,name,tags
0,"{'eval_auc': 0.9879245179290536, 'eval_mcc': 0...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_10_33_logstep=...,[SAFB]
1,"{'eval_Validation loss': 0.6963582038879395, '...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_09_05_logstep=...,[SAFB]
2,"{'learning_rate': 0, '_wandb': {'runtime': 82}...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_07_33_logstep=...,[SAFB]
3,"{'eval_Validation loss': 0.6931957205136617, '...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_03_26_logstep=...,[AGO1]
4,"{'_wandb': {'runtime': 237}, 'eval_precision':...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_09_59_19_logstep=...,[AGO1]
...,...,...,...,...
5186,"{'eval_recall': 0.5, 'eval_Validation loss': 0...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,[SAP30]
5187,"{'eval_mcc': 0.5974061882261203, '_timestamp':...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,[SAP30]
5188,"{'_step': 324, 'eval_f1': 0.8994774307199206, ...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,[SAP30]
5189,"{'eval_recall': 0.8872796669859276, 'learning_...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,[SAP30]


In [8]:
# Normalize the summary column to create a DataFrame of summaries
summaries_df = pd.json_normalize(runs_df['summary'])

# Concatenate the new summary columns with the original DataFrame (minus the old 'summary' column)
expanded_runs_df = pd.concat([runs_df.drop('summary', axis=1), summaries_df], axis=1)
# Assuming expanded_runs_df is your DataFrame
expanded_runs_df['tags'] = expanded_runs_df['tags'].apply(lambda x: x[0] if x else None)

In [9]:
expanded_runs_df

Unnamed: 0,config,name,tags,eval_auc,eval_mcc,train_loss,_step,eval_recall,learning_rate,eval_Validation loss,...,eval_acc,eval_precision,Confusion Matrix.sha256,Confusion Matrix.path,Confusion Matrix.size,Confusion Matrix._type,Confusion Matrix.width,Confusion Matrix.format,Confusion Matrix.height,_wandb.runtime
0,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_10_33_logstep=...,SAFB,0.987925,0.935781,0.112886,30.0,0.967931,0.000000e+00,0.113446,...,0.967675,0.967850,5d7bf47ec9113c87ade58bd4b1ac511ccf426ff6e91d9c...,media/images/Confusion Matrix_27_5d7bf47ec9113...,55532.0,image-file,1200.0,png,1200.0,74.0
1,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_09_05_logstep=...,SAFB,0.606054,0.000000,0.698205,30.0,0.500000,0.000000e+00,0.696358,...,0.491090,0.245545,7d6b603b312f8bdda3e14dc028964f8d2e244322b393a7...,media/images/Confusion Matrix_27_7d6b603b312f8...,50723.0,image-file,1200.0,png,1200.0,76.0
2,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_07_33_logstep=...,SAFB,0.986670,0.936586,0.097331,30.0,0.968338,0.000000e+00,0.112461,...,0.968090,0.968248,813516ac47ebf073631d951424352c57d4d1daab428a3b...,media/images/Confusion Matrix_27_813516ac47ebf...,55238.0,image-file,1200.0,png,1200.0,82.0
3,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_03_26_logstep=...,AGO1,0.505238,0.000000,0.697180,91.0,0.500000,3.000000e-04,0.693196,...,0.498625,0.249312,6e0d20a11b4a297143c1f4e331fc80d2c4579d6dbbdb8b...,media/images/Confusion Matrix_78_6e0d20a11b4a2...,51178.0,image-file,1200.0,png,1200.0,237.0
4,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_09_59_19_logstep=...,AGO1,0.964254,0.868696,0.192743,91.0,0.934146,1.000000e-04,0.209951,...,0.934185,0.934551,8f3382966f137a480fcbbd26570131b803e1df06814214...,media/images/Confusion Matrix_78_8f3382966f137...,55549.0,image-file,1200.0,png,1200.0,237.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5186,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,SAP30,0.505400,0.000000,0.697234,327.0,0.500000,3.873874e-04,0.693131,...,0.503869,0.251934,8bb7708fa5de7feeb1d06e439d1d08483f9f17af9479c6...,media/images/Confusion Matrix_218_8bb7708fa5de...,49619.0,image-file,1200.0,png,1200.0,692.0
5187,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,SAP30,0.879813,0.597406,0.547333,324.0,0.798311,1.381381e-07,0.537199,...,0.798092,0.799096,5a78816cab0e1fa1c6a6f02ac5e67d8fb0dd137cfec7c4...,media/images/Confusion Matrix_216_5a78816cab0e...,57947.0,image-file,1200.0,png,1200.0,693.0
5188,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,SAP30,0.962615,0.799286,0.242748,324.0,0.899412,1.381381e-06,0.245715,...,0.899523,0.899874,265f5743eafafb5593d85b3f5b25a8bde9f25d156c0678...,media/images/Confusion Matrix_216_265f5743eafa...,59356.0,image-file,1200.0,png,1200.0,700.0
5189,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,SAP30,0.951327,0.776720,0.171405,324.0,0.887280,1.381381e-05,0.341513,...,0.887546,0.889443,7375fb8417576ec4fc3d46996dd222308076be3b318ac5...,media/images/Confusion Matrix_216_7375fb841757...,58933.0,image-file,1200.0,png,1200.0,696.0


In [14]:
# Remove rows with any NaN values and keep the original index
expanded_runs_df = expanded_runs_df.dropna().reset_index()
expanded_runs_df

Unnamed: 0,level_0,index,config,name,tags,eval_auc,eval_mcc,train_loss,_step,eval_recall,...,eval_acc,eval_precision,Confusion Matrix.sha256,Confusion Matrix.path,Confusion Matrix.size,Confusion Matrix._type,Confusion Matrix.width,Confusion Matrix.format,Confusion Matrix.height,_wandb.runtime
0,0,0,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_10_33_logstep=...,SAFB,0.987925,0.935781,0.112886,30.0,0.967931,...,0.967675,0.967850,5d7bf47ec9113c87ade58bd4b1ac511ccf426ff6e91d9c...,media/images/Confusion Matrix_27_5d7bf47ec9113...,55532.0,image-file,1200.0,png,1200.0,74.0
1,1,1,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_09_05_logstep=...,SAFB,0.606054,0.000000,0.698205,30.0,0.500000,...,0.491090,0.245545,7d6b603b312f8bdda3e14dc028964f8d2e244322b393a7...,media/images/Confusion Matrix_27_7d6b603b312f8...,50723.0,image-file,1200.0,png,1200.0,76.0
2,2,2,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_07_33_logstep=...,SAFB,0.986670,0.936586,0.097331,30.0,0.968338,...,0.968090,0.968248,813516ac47ebf073631d951424352c57d4d1daab428a3b...,media/images/Confusion Matrix_27_813516ac47ebf...,55238.0,image-file,1200.0,png,1200.0,82.0
3,3,3,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_03_26_logstep=...,AGO1,0.505238,0.000000,0.697180,91.0,0.500000,...,0.498625,0.249312,6e0d20a11b4a297143c1f4e331fc80d2c4579d6dbbdb8b...,media/images/Confusion Matrix_78_6e0d20a11b4a2...,51178.0,image-file,1200.0,png,1200.0,237.0
4,4,4,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_09_59_19_logstep=...,AGO1,0.964254,0.868696,0.192743,91.0,0.934146,...,0.934185,0.934551,8f3382966f137a480fcbbd26570131b803e1df06814214...,media/images/Confusion Matrix_78_8f3382966f137...,55549.0,image-file,1200.0,png,1200.0,237.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4886,4886,5186,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,SAP30,0.505400,0.000000,0.697234,327.0,0.500000,...,0.503869,0.251934,8bb7708fa5de7feeb1d06e439d1d08483f9f17af9479c6...,media/images/Confusion Matrix_218_8bb7708fa5de...,49619.0,image-file,1200.0,png,1200.0,692.0
4887,4887,5187,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,SAP30,0.879813,0.597406,0.547333,324.0,0.798311,...,0.798092,0.799096,5a78816cab0e1fa1c6a6f02ac5e67d8fb0dd137cfec7c4...,media/images/Confusion Matrix_216_5a78816cab0e...,57947.0,image-file,1200.0,png,1200.0,693.0
4888,4888,5188,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,SAP30,0.962615,0.799286,0.242748,324.0,0.899412,...,0.899523,0.899874,265f5743eafafb5593d85b3f5b25a8bde9f25d156c0678...,media/images/Confusion Matrix_216_265f5743eafa...,59356.0,image-file,1200.0,png,1200.0,700.0
4889,4889,5189,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,SAP30,0.951327,0.776720,0.171405,324.0,0.887280,...,0.887546,0.889443,7375fb8417576ec4fc3d46996dd222308076be3b318ac5...,media/images/Confusion Matrix_216_7375fb841757...,58933.0,image-file,1200.0,png,1200.0,696.0


In [15]:
# Group by 'tags' and find the index of the max 'eval_acc' for each group
idx = expanded_runs_df.groupby('tags')['eval_acc'].idxmax()

# Use the indices to select rows from the original DataFrame
best_acc_df = expanded_runs_df.loc[idx].reset_index(drop=True)
best_acc_df

Unnamed: 0,level_0,index,config,name,tags,eval_auc,eval_mcc,train_loss,_step,eval_recall,...,eval_acc,eval_precision,Confusion Matrix.sha256,Confusion Matrix.path,Confusion Matrix.size,Confusion Matrix._type,Confusion Matrix.width,Confusion Matrix.format,Confusion Matrix.height,_wandb.runtime
0,1004,1130,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_02th_2024_01_59_24_logstep=...,ADNP,0.777999,0.396992,0.491164,84.0,0.698145,...,0.697791,0.698848,4cd68014477cf9a253946fb3a8e5731064dca356908b5e...,media/images/Confusion Matrix_72_4cd68014477cf...,59260.0,image-file,1200.0,png,1200.0,210.0
1,1206,1350,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_01th_2024_00_56_04_logstep=...,AFF1,0.856851,0.543392,0.412298,112.0,0.771234,...,0.771503,0.772159,57538da06cb056c9ea737cee9856bb0904daea3344d1ba...,media/images/Confusion Matrix_96_57538da06cb05...,56987.0,image-file,1200.0,png,1200.0,272.0
2,6,6,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_09_51_03_logstep=...,AGO1,0.966290,0.869502,0.173443,91.0,0.934538,...,0.934578,0.934964,eb9f9661213cd7aa22f6c54b1df7910ca13684deb9f6da...,media/images/Confusion Matrix_78_eb9f9661213cd...,54813.0,image-file,1200.0,png,1200.0,237.0
3,2651,2950,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_11th_2024_07_48_56_logstep=...,AGO2,0.936196,0.794619,0.264199,633.0,0.895822,...,0.895999,0.898803,958cd42b536bc3994165310ae2cbd90fbd514c41e93d89...,media/images/Confusion Matrix_422_958cd42b536b...,60413.0,image-file,1200.0,png,1200.0,1362.0
4,2085,2314,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_22th_2024_03_02_22_logstep=...,ARHGAP35,0.901186,0.645762,0.333016,66.0,0.821965,...,0.821501,0.823799,05bda9ec50bc339493dca11b5d394511f04b407205e0ee...,media/images/Confusion Matrix_44_05bda9ec50bc3...,57200.0,image-file,1200.0,png,1200.0,154.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
416,1239,1394,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_March_29th_2024_22_13_39_logstep=...,ZSCAN4,0.924647,0.751729,0.235353,105.0,0.872456,...,0.872801,0.879305,8c84502b2c48ab7e127586b4b2de9ef4de9a25c6b57cda...,media/images/Confusion Matrix_90_8c84502b2c48a...,55101.0,image-file,1200.0,png,1200.0,255.0
417,1192,1336,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_01th_2024_01_31_02_logstep=...,ZSCAN5A,0.939999,0.743027,0.300181,40.0,0.868192,...,0.868922,0.874865,15e3928e27a81126a8756b7a2874f005f41fb2d662555a...,media/images/Confusion Matrix_36_15e3928e27a81...,58513.0,image-file,1200.0,png,1200.0,98.0
418,1511,1702,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_March_28th_2024_00_35_12_logstep=...,ZSCAN5C,0.863172,0.564038,0.422188,80.0,0.781980,...,0.781928,0.782058,93e14e1228775373425ea1ed716add7bee037c49401919...,media/images/Confusion Matrix_70_93e14e1228775...,55333.0,image-file,1200.0,png,1200.0,192.0
419,4225,4524,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_01th_2024_02_09_02_logstep=...,ZSCAN9,0.890887,0.625169,0.307224,282.0,0.812420,...,0.812469,0.812749,e5c3db86ea9e90d0c522f3bbd8795316ce7d735f6f33ba...,media/images/Confusion Matrix_188_e5c3db86ea9e...,54036.0,image-file,1200.0,png,1200.0,601.0


In [16]:
best_acc_df.iloc[1]['config']

{'epochs': 10,
 'Dropout': 0.1,
 'batch_size': 4096,
 'Weight Decay': 0.005,
 'architecture': 'TFBS_Finetuned_Models',
 'learning_rate': 0.001,
 'warm up percentage': 0.1,
 'Maximum sequence length': 100}

In [17]:
base_path= "/data/private/pdutta/DNABERT_output/TFBS/Finetuned_models"

In [18]:
for index, row in best_acc_df.iterrows():
    config = row['config']
    
    # Format the learning rate string for the folder name
    learning_rate_str = format(config['learning_rate'], '.1e').split('e-')[-1].lstrip('0')
    learning_rate_folder = f"e-{learning_rate_str}"

    tag = row['tags']
    
    # Construct the pattern to match the best model directory
    best_model_dir_pattern = f"_bs={config['batch_size']}_lr={config['learning_rate']}_wp={config['warm up percentage']}_dp={config['Dropout']}_wd={config['Weight Decay']}_len={config['Maximum sequence length']}_epoch={config['epochs']}.0"

    tag_directory = os.path.join(base_path, tag)
    learning_rate_directory = os.path.join(tag_directory, learning_rate_folder)
    
    if os.path.exists(learning_rate_directory):
        # Find the best model in the learning rate directory
        best_model_path = ""
        for model_dir in os.listdir(learning_rate_directory):
            if best_model_dir_pattern in model_dir:
                best_model_path = os.path.join(learning_rate_directory, model_dir)
                break

        # Delete other models in the learning rate directory
        if best_model_path:
            for model_dir in os.listdir(learning_rate_directory):
                full_model_dir = os.path.join(learning_rate_directory, model_dir)
                if full_model_dir != best_model_path:
                    print(f"Deleting model: {full_model_dir}")
                    shutil.rmtree(full_model_dir)
        else:
            print("No best model match found in the learning rate directory.")

        # Now, delete other learning rate directories under the tag
        for lr_dir in os.listdir(tag_directory):
            full_lr_dir_path = os.path.join(tag_directory, lr_dir)
            if full_lr_dir_path != learning_rate_directory:
                print(f"Deleting learning rate directory: {full_lr_dir_path}")
                shutil.rmtree(full_lr_dir_path)
    else:
        #print(f"Learning rate directory does not exist: {learning_rate_directory}")
        continue

Deleting model: /data/private/pdutta/DNABERT_output/TFBS/Finetuned_models/ADNP/e-3/logstep=12_bs=4096_lr=0.001_wp=0.1_dp=0.1_wd=0.0005_len=100_epoch=10.0
Deleting model: /data/private/pdutta/DNABERT_output/TFBS/Finetuned_models/ADNP/e-3/logstep=12_bs=4096_lr=0.003_wp=0.1_dp=0.1_wd=0.001_len=100_epoch=10.0
Deleting model: /data/private/pdutta/DNABERT_output/TFBS/Finetuned_models/ADNP/e-3/logstep=12_bs=4096_lr=0.003_wp=0.1_dp=0.1_wd=0.0005_len=100_epoch=10.0
Deleting model: /data/private/pdutta/DNABERT_output/TFBS/Finetuned_models/ADNP/e-3/logstep=12_bs=4096_lr=0.003_wp=0.1_dp=0.1_wd=0.005_len=100_epoch=10.0
Deleting model: /data/private/pdutta/DNABERT_output/TFBS/Finetuned_models/ADNP/e-3/logstep=12_bs=4096_lr=0.001_wp=0.1_dp=0.1_wd=0.005_len=100_epoch=10.0
Deleting model: /data/private/pdutta/DNABERT_output/TFBS/Finetuned_models/AFF1/e-3/logstep=16_bs=4096_lr=0.003_wp=0.1_dp=0.1_wd=0.001_len=100_epoch=10.0
Deleting model: /data/private/pdutta/DNABERT_output/TFBS/Finetuned_models/AFF1/e

In [19]:
best_acc_df.sort_values(by="eval_acc", ascending=False)[['tags','eval_acc']]

Unnamed: 0,tags,eval_acc
217,RBM14,0.968908
230,SAFB,0.968090
265,TAF15,0.959936
254,SPI1,0.957358
290,USF1,0.951848
...,...,...
209,PRPF4,0.500579
94,H3K9me3,0.499966
88,H3K27ac,0.499856
85,H3F3A,0.499651


In [20]:
best_acc_df.sort_values(by="eval_acc", ascending=False)[['tags','eval_acc']].to_csv("TFBS_accuracy_Stat.tsv", sep="\t")

In [None]:
best_acc_df