In [1]:
import pandas as pd
import wandb

In [2]:
api = wandb.Api()
entity, project = "pratik24111991", "TFBS_Finetuned_Models_Work"
runs = api.runs(entity + "/" + project)

In [3]:
summary_list, config_list, name_list, tag_list = [], [], [], []
for run in runs:
    # .summary contains output keys/values for
    # metrics such as accuracy.
    #  We call ._json_dict to omit large files
    summary_list.append(run.summary._json_dict)

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append({k: v for k, v in run.config.items() if not k.startswith("_")})

    # .name is the human-readable name of the run.
    name_list.append(run.name)
    tag_list.append(run.tags)

runs_df = pd.DataFrame(
    {"summary": summary_list, "config": config_list, "name": name_list, "tags":tag_list}
)

In [4]:
runs_df

Unnamed: 0,summary,config,name,tags
0,"{'eval_acc': 0.8141840209238058, 'eval_auc': 0...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_06_55_43_logstep=...,"[H4K8ac, TFBS_NonTFBS, e-6]"
1,"{'eval_Validation loss': 0.31290883438972134, ...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_02_00_36_logstep=...,"[POLR2AphosphoS5, TFBS_NonTFBS, e-5]"
2,"{'_step': 11223, '_wandb': {'runtime': 23864},...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_00_17_48_logstep=...,"[H4K8ac, TFBS_NonTFBS, e-6]"
3,"{'_timestamp': 1710908223.2083795, 'train_loss...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_00_14_05_logstep=...,"[CHD7, TFBS_NonTFBS, e-6]"
4,"{'eval_precision': 0.4991053469271709, 'Confus...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_00_10_22_logstep=...,"[CHD7, TFBS_NonTFBS, e-6]"
...,...,...,...,...
2808,"{'_timestamp': 1709079329.9662905, 'train_loss...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,"[SAP30, TFBS_NonTFBS, e-3]"
2809,"{'_wandb': {'runtime': 693}, '_timestamp': 170...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,"[SAP30, TFBS_NonTFBS, e-6]"
2810,"{'_runtime': 608.8705170154572, 'eval_acc': 0....","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,"[SAP30, TFBS_NonTFBS, e-5]"
2811,"{'eval_precision': 0.8894432333097333, 'eval_V...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,"[SAP30, TFBS_NonTFBS, e-4]"


In [5]:
# Define a function to filter tags
def filter_tags(tags):
    return [tag for tag in tags if '-' not in tag and 'TFBS' not in tag]

In [6]:
runs_df['tags'] = runs_df['tags'].apply(filter_tags)

In [7]:
runs_df

Unnamed: 0,summary,config,name,tags
0,"{'eval_acc': 0.8141840209238058, 'eval_auc': 0...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_06_55_43_logstep=...,[H4K8ac]
1,"{'eval_Validation loss': 0.31290883438972134, ...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_02_00_36_logstep=...,[POLR2AphosphoS5]
2,"{'_step': 11223, '_wandb': {'runtime': 23864},...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_00_17_48_logstep=...,[H4K8ac]
3,"{'_timestamp': 1710908223.2083795, 'train_loss...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_00_14_05_logstep=...,[CHD7]
4,"{'eval_precision': 0.4991053469271709, 'Confus...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_00_10_22_logstep=...,[CHD7]
...,...,...,...,...
2808,"{'_timestamp': 1709079329.9662905, 'train_loss...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,[SAP30]
2809,"{'_wandb': {'runtime': 693}, '_timestamp': 170...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,[SAP30]
2810,"{'_runtime': 608.8705170154572, 'eval_acc': 0....","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,[SAP30]
2811,"{'eval_precision': 0.8894432333097333, 'eval_V...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,[SAP30]


In [8]:
# Normalize the summary column to create a DataFrame of summaries
summaries_df = pd.json_normalize(runs_df['summary'])

# Concatenate the new summary columns with the original DataFrame (minus the old 'summary' column)
expanded_runs_df = pd.concat([runs_df.drop('summary', axis=1), summaries_df], axis=1)
# Assuming expanded_runs_df is your DataFrame
expanded_runs_df['tags'] = expanded_runs_df['tags'].apply(lambda x: x[0] if x else None)

In [9]:
expanded_runs_df

Unnamed: 0,config,name,tags,eval_acc,eval_auc,eval_mcc,_timestamp,learning_rate,_step,eval_f1,...,eval_precision,eval_Validation loss,Confusion Matrix.size,Confusion Matrix._type,Confusion Matrix.width,Confusion Matrix.format,Confusion Matrix.height,Confusion Matrix.sha256,Confusion Matrix.path,_wandb.runtime
0,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_06_55_43_logstep=...,H4K8ac,0.814184,0.892406,0.629269,1.710939e+09,2.334135e-06,3741.0,0.814082,...,0.815019,0.410164,58115.0,image-file,1200.0,png,1200.0,17b466f0785c15729ab2e248d4fd97251dc86efa904fbc...,media/images/Confusion Matrix_0_17b466f0785c15...,
1,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_02_00_36_logstep=...,POLR2AphosphoS5,0.863051,0.940814,0.727201,1.710932e+09,2.333550e-05,9214.0,0.862936,...,0.864185,0.312909,62762.0,image-file,1200.0,png,1200.0,895ffc6bdfe4cfa48cf5adc862bbdc468d488a289f1405...,media/images/Confusion Matrix_0_895ffc6bdfe4cf...,
2,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_00_17_48_logstep=...,H4K8ac,0.813573,0.891846,0.627656,1.710930e+09,1.119124e-07,11223.0,0.813521,...,0.814033,0.412207,57742.0,image-file,1200.0,png,1200.0,6af5f36b11d4c94a47b1cf586d94d261751d89b13434b4...,media/images/Confusion Matrix_7482_6af5f36b11d...,23864.0
3,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_00_14_05_logstep=...,CHD7,0.552650,0.576073,0.105400,1.710908e+09,5.151515e-07,93.0,0.548396,...,0.553621,0.683100,57583.0,image-file,1200.0,png,1200.0,a17f6a75d4f749fd7182fb9b3e59c03f8275f222c2f47e...,media/images/Confusion Matrix_62_a17f6a75d4f74...,212.0
4,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_00_10_22_logstep=...,CHD7,0.500000,0.496717,-0.001758,1.710908e+09,1.717172e-07,93.0,0.495209,...,0.499105,0.701082,54928.0,image-file,1200.0,png,1200.0,ec7ff63fb1d36f8036c462810568f858029fcb5b983c11...,media/images/Confusion Matrix_62_ec7ff63fb1d36...,211.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2808,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,SAP30,0.503869,0.505400,0.000000,1.709079e+09,3.873874e-04,327.0,0.335048,...,0.251934,0.693131,49619.0,image-file,1200.0,png,1200.0,8bb7708fa5de7feeb1d06e439d1d08483f9f17af9479c6...,media/images/Confusion Matrix_218_8bb7708fa5de...,692.0
2809,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,SAP30,0.798092,0.879813,0.597406,1.709079e+09,1.381381e-07,324.0,0.797994,...,0.799096,0.537199,57947.0,image-file,1200.0,png,1200.0,5a78816cab0e1fa1c6a6f02ac5e67d8fb0dd137cfec7c4...,media/images/Confusion Matrix_216_5a78816cab0e...,693.0
2810,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,SAP30,0.899523,0.962615,0.799286,1.709079e+09,1.381381e-06,324.0,0.899477,...,0.899874,0.245715,59356.0,image-file,1200.0,png,1200.0,265f5743eafafb5593d85b3f5b25a8bde9f25d156c0678...,media/images/Confusion Matrix_216_265f5743eafa...,700.0
2811,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,SAP30,0.887546,0.951327,0.776720,1.709079e+09,1.381381e-05,324.0,0.887354,...,0.889443,0.341513,58933.0,image-file,1200.0,png,1200.0,7375fb8417576ec4fc3d46996dd222308076be3b318ac5...,media/images/Confusion Matrix_216_7375fb841757...,696.0


In [10]:
# Remove rows with any NaN values and keep the original index
expanded_runs_df = expanded_runs_df.dropna().reset_index()
expanded_runs_df

Unnamed: 0,index,config,name,tags,eval_acc,eval_auc,eval_mcc,_timestamp,learning_rate,_step,...,eval_precision,eval_Validation loss,Confusion Matrix.size,Confusion Matrix._type,Confusion Matrix.width,Confusion Matrix.format,Confusion Matrix.height,Confusion Matrix.sha256,Confusion Matrix.path,_wandb.runtime
0,2,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_00_17_48_logstep=...,H4K8ac,0.813573,0.891846,0.627656,1.710930e+09,1.119124e-07,11223.0,...,0.814033,0.412207,57742.0,image-file,1200.0,png,1200.0,6af5f36b11d4c94a47b1cf586d94d261751d89b13434b4...,media/images/Confusion Matrix_7482_6af5f36b11d...,23864.0
1,3,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_00_14_05_logstep=...,CHD7,0.552650,0.576073,0.105400,1.710908e+09,5.151515e-07,93.0,...,0.553621,0.683100,57583.0,image-file,1200.0,png,1200.0,a17f6a75d4f749fd7182fb9b3e59c03f8275f222c2f47e...,media/images/Confusion Matrix_62_a17f6a75d4f74...,212.0
2,4,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_00_10_22_logstep=...,CHD7,0.500000,0.496717,-0.001758,1.710908e+09,1.717172e-07,93.0,...,0.499105,0.701082,54928.0,image-file,1200.0,png,1200.0,ec7ff63fb1d36f8036c462810568f858029fcb5b983c11...,media/images/Confusion Matrix_62_ec7ff63fb1d36...,211.0
3,5,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_00_06_39_logstep=...,CHD7,0.552650,0.576073,0.105400,1.710908e+09,5.151515e-07,93.0,...,0.553621,0.683100,57583.0,image-file,1200.0,png,1200.0,a17f6a75d4f749fd7182fb9b3e59c03f8275f222c2f47e...,media/images/Confusion Matrix_62_a17f6a75d4f74...,212.0
4,6,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_20th_2024_00_02_53_logstep=...,CHD7,0.500000,0.496717,-0.001758,1.710908e+09,1.717172e-07,93.0,...,0.499105,0.701082,54928.0,image-file,1200.0,png,1200.0,ec7ff63fb1d36f8036c462810568f858029fcb5b983c11...,media/images/Confusion Matrix_62_ec7ff63fb1d36...,212.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2744,2808,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,SAP30,0.503869,0.505400,0.000000,1.709079e+09,3.873874e-04,327.0,...,0.251934,0.693131,49619.0,image-file,1200.0,png,1200.0,8bb7708fa5de7feeb1d06e439d1d08483f9f17af9479c6...,media/images/Confusion Matrix_218_8bb7708fa5de...,692.0
2745,2809,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,SAP30,0.798092,0.879813,0.597406,1.709079e+09,1.381381e-07,324.0,...,0.799096,0.537199,57947.0,image-file,1200.0,png,1200.0,5a78816cab0e1fa1c6a6f02ac5e67d8fb0dd137cfec7c4...,media/images/Confusion Matrix_216_5a78816cab0e...,693.0
2746,2810,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,SAP30,0.899523,0.962615,0.799286,1.709079e+09,1.381381e-06,324.0,...,0.899874,0.245715,59356.0,image-file,1200.0,png,1200.0,265f5743eafafb5593d85b3f5b25a8bde9f25d156c0678...,media/images/Confusion Matrix_216_265f5743eafa...,700.0
2747,2811,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,SAP30,0.887546,0.951327,0.776720,1.709079e+09,1.381381e-05,324.0,...,0.889443,0.341513,58933.0,image-file,1200.0,png,1200.0,7375fb8417576ec4fc3d46996dd222308076be3b318ac5...,media/images/Confusion Matrix_216_7375fb841757...,696.0


In [11]:
# Group by 'tags' and find the index of the max 'eval_acc' for each group
idx = expanded_runs_df.groupby('tags')['eval_acc'].idxmax()

# Use the indices to select rows from the original DataFrame
best_acc_df = expanded_runs_df.loc[idx].reset_index(drop=True)
best_acc_df

Unnamed: 0,index,config,name,tags,eval_acc,eval_auc,eval_mcc,_timestamp,learning_rate,_step,...,eval_precision,eval_Validation loss,Confusion Matrix.size,Confusion Matrix._type,Confusion Matrix.width,Confusion Matrix.format,Confusion Matrix.height,Confusion Matrix.sha256,Confusion Matrix.path,_wandb.runtime
0,572,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_11th_2024_07_48_56_logstep=...,AGO2,0.895999,0.936196,0.794619,1.710159e+09,0.000004,633.0,...,0.898803,0.298919,60413.0,image-file,1200.0,png,1200.0,958cd42b536bc3994165310ae2cbd90fbd514c41e93d89...,media/images/Confusion Matrix_422_958cd42b536b...,1362.0
1,1523,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_03th_2024_07_58_34_logstep=...,ARID3A,0.764071,0.837500,0.533582,1.709472e+09,0.000036,795.0,...,0.768812,0.765750,59950.0,image-file,1200.0,png,1200.0,9790b8d093b46b5410432abe0ae5c05698c8f6338f0c86...,media/images/Confusion Matrix_530_9790b8d093b4...,1699.0
2,760,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_10th_2024_18_51_55_logstep=...,ARNT,0.750443,0.826269,0.501429,1.710113e+09,0.000121,873.0,...,0.750809,0.593425,58394.0,image-file,1200.0,png,1200.0,4654e6e9f4b65a33e5232fe1c4e4f3ad0d152246fc91aa...,media/images/Confusion Matrix_582_4654e6e9f4b6...,1831.0
3,2173,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_29th_2024_20_07_02_logst...,BLC3,0.908906,0.960377,0.818516,1.709262e+09,0.000034,3555.0,...,0.909569,0.361423,62161.0,image-file,1200.0,png,1200.0,62feb30361869d99e534374c07accb544ecca05e54c11e...,media/images/Confusion Matrix_2370_62feb303618...,7580.0
4,1748,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_02th_2024_07_41_10_logstep=...,BRD9,0.738697,0.827697,0.478249,1.709384e+09,0.000004,243.0,...,0.740089,0.512739,58948.0,image-file,1200.0,png,1200.0,0b36807bba0d5ceda023762edbaf45d0b463b29bbec878...,media/images/Confusion Matrix_162_0b36807bba0d...,534.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,613,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_11th_2024_06_11_19_logstep=...,ZNF692,0.832101,0.909846,0.664753,1.710153e+09,0.000129,486.0,...,0.832817,0.425019,55171.0,image-file,1200.0,png,1200.0,d0d6ce66dba2ae38fd584c6e040cfc876601c15f1f4a5a...,media/images/Confusion Matrix_324_d0d6ce66dba2...,1025.0
114,122,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_15th_2024_05_47_54_logstep=...,ZNF777,0.912411,0.957537,0.826734,1.710496e+09,0.000005,102.0,...,0.914701,0.250427,56452.0,image-file,1200.0,png,1200.0,447a6308503993f992c6b96caab6b4a97b21c256298d66...,media/images/Confusion Matrix_68_447a630850399...,237.0
115,1074,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_09th_2024_03_13_54_logstep=...,ZNF792,0.809028,0.887947,0.621292,1.709973e+09,0.000126,408.0,...,0.812972,0.530071,59776.0,image-file,1200.0,png,1200.0,0a705a0c192a9c707044695c28c4b400f4158812f32126...,media/images/Confusion Matrix_272_0a705a0c192a...,862.0
116,2146,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_01th_2024_02_09_02_logstep=...,ZSCAN9,0.812469,0.890887,0.625169,1.709277e+09,0.000132,282.0,...,0.812749,0.483263,54036.0,image-file,1200.0,png,1200.0,e5c3db86ea9e90d0c522f3bbd8795316ce7d735f6f33ba...,media/images/Confusion Matrix_188_e5c3db86ea9e...,601.0


In [12]:
best_acc_df.sort_values(by="eval_acc", ascending=False)[['tags','eval_acc']]

Unnamed: 0,tags,eval_acc
62,RBM14,0.968908
72,SPI1,0.957358
42,MAFF,0.939551
78,THAP1,0.937964
14,E2F4,0.918800
...,...,...
35,ILF3,0.715873
10,CHD7,0.709527
58,PTRF,0.705040
79,THRAP3,0.683565


In [13]:
best_acc_df.sort_values(by="eval_acc", ascending=False)[['tags','eval_acc']].to_csv("TFBS_accuracy_Stat.tsv", sep="\t")

In [None]:
best_acc_df