In [1]:
import pandas as pd
import wandb

In [2]:
api = wandb.Api()
entity, project = "pratik24111991", "TFBS_Finetuned_Models_Work"
runs = api.runs(entity + "/" + project)

In [3]:
summary_list, config_list, name_list, tag_list = [], [], [], []
for run in runs:
    # .summary contains output keys/values for
    # metrics such as accuracy.
    #  We call ._json_dict to omit large files
    summary_list.append(run.summary._json_dict)

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append({k: v for k, v in run.config.items() if not k.startswith("_")})

    # .name is the human-readable name of the run.
    name_list.append(run.name)
    tag_list.append(run.tags)

runs_df = pd.DataFrame(
    {"summary": summary_list, "config": config_list, "name": name_list, "tags":tag_list}
)

In [4]:
runs_df

Unnamed: 0,summary,config,name,tags
0,"{'_step': 30, '_wandb': {'runtime': 74}, 'eval...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_10_33_logstep=...,"[SAFB, TFBS_NonTFBS, e-3]"
1,"{'Confusion Matrix': {'width': 1200, 'format':...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_09_05_logstep=...,"[SAFB, TFBS_NonTFBS, e-3]"
2,"{'eval_f1': 0.9680891643716922, 'eval_auc': 0....","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_07_33_logstep=...,"[SAFB, TFBS_NonTFBS, e-3]"
3,"{'train_loss': 0.6971799089358404, 'eval_recal...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_03_26_logstep=...,"[AGO1, TFBS_NonTFBS, e-3]"
4,"{'eval_precision': 0.9345507090854328, 'Confus...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_09_59_19_logstep=...,"[AGO1, TFBS_NonTFBS, e-3]"
...,...,...,...,...
5186,"{'_runtime': 606.5656895637512, 'eval_acc': 0....","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,"[SAP30, TFBS_NonTFBS, e-3]"
5187,"{'eval_auc': 0.8798126755400077, 'learning_rat...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,"[SAP30, TFBS_NonTFBS, e-6]"
5188,"{'eval_acc': 0.8995230524642289, 'eval_mcc': 0...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,"[SAP30, TFBS_NonTFBS, e-5]"
5189,"{'_wandb': {'runtime': 696}, '_timestamp': 170...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,"[SAP30, TFBS_NonTFBS, e-4]"


In [5]:
# Define a function to filter tags
def filter_tags(tags):
    return [tag for tag in tags if '-' not in tag and 'TFBS' not in tag]

In [6]:
runs_df['tags'] = runs_df['tags'].apply(filter_tags)

In [7]:
runs_df

Unnamed: 0,summary,config,name,tags
0,"{'_step': 30, '_wandb': {'runtime': 74}, 'eval...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_10_33_logstep=...,[SAFB]
1,"{'Confusion Matrix': {'width': 1200, 'format':...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_09_05_logstep=...,[SAFB]
2,"{'eval_f1': 0.9680891643716922, 'eval_auc': 0....","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_07_33_logstep=...,[SAFB]
3,"{'train_loss': 0.6971799089358404, 'eval_recal...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_03_26_logstep=...,[AGO1]
4,"{'eval_precision': 0.9345507090854328, 'Confus...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_09_59_19_logstep=...,[AGO1]
...,...,...,...,...
5186,"{'_runtime': 606.5656895637512, 'eval_acc': 0....","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,[SAP30]
5187,"{'eval_auc': 0.8798126755400077, 'learning_rat...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,[SAP30]
5188,"{'eval_acc': 0.8995230524642289, 'eval_mcc': 0...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,[SAP30]
5189,"{'_wandb': {'runtime': 696}, '_timestamp': 170...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,[SAP30]


In [8]:
# Normalize the summary column to create a DataFrame of summaries
summaries_df = pd.json_normalize(runs_df['summary'])

# Concatenate the new summary columns with the original DataFrame (minus the old 'summary' column)
expanded_runs_df = pd.concat([runs_df.drop('summary', axis=1), summaries_df], axis=1)
# Assuming expanded_runs_df is your DataFrame
expanded_runs_df['tags'] = expanded_runs_df['tags'].apply(lambda x: x[0] if x else None)

In [9]:
expanded_runs_df

Unnamed: 0,config,name,tags,_step,eval_auc,eval_mcc,train_loss,_runtime,learning_rate,eval_f1,...,_timestamp,eval_recall,_wandb.runtime,Confusion Matrix.path,Confusion Matrix.size,Confusion Matrix._type,Confusion Matrix.width,Confusion Matrix.format,Confusion Matrix.height,Confusion Matrix.sha256
0,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_10_33_logstep=...,SAFB,30.0,0.987925,0.935781,0.112886,74.733844,0.000000e+00,0.967675,...,1.712586e+09,0.967931,74.0,media/images/Confusion Matrix_27_5d7bf47ec9113...,55532.0,image-file,1200.0,png,1200.0,5d7bf47ec9113c87ade58bd4b1ac511ccf426ff6e91d9c...
1,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_09_05_logstep=...,SAFB,30.0,0.606054,0.000000,0.698205,72.861082,0.000000e+00,0.329350,...,1.712585e+09,0.500000,76.0,media/images/Confusion Matrix_27_7d6b603b312f8...,50723.0,image-file,1200.0,png,1200.0,7d6b603b312f8bdda3e14dc028964f8d2e244322b393a7...
2,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_07_33_logstep=...,SAFB,30.0,0.986670,0.936586,0.097331,78.674067,0.000000e+00,0.968089,...,1.712585e+09,0.968338,82.0,media/images/Confusion Matrix_27_813516ac47ebf...,55238.0,image-file,1200.0,png,1200.0,813516ac47ebf073631d951424352c57d4d1daab428a3b...
3,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_03_26_logstep=...,AGO1,91.0,0.505238,0.000000,0.697180,214.198024,3.000000e-04,0.332722,...,1.712585e+09,0.500000,237.0,media/images/Confusion Matrix_78_6e0d20a11b4a2...,51178.0,image-file,1200.0,png,1200.0,6e0d20a11b4a297143c1f4e331fc80d2c4579d6dbbdb8b...
4,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_09_59_19_logstep=...,AGO1,91.0,0.964254,0.868696,0.192743,214.822578,1.000000e-04,0.934166,...,1.712585e+09,0.934146,237.0,media/images/Confusion Matrix_78_8f3382966f137...,55549.0,image-file,1200.0,png,1200.0,8f3382966f137a480fcbbd26570131b803e1df06814214...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5186,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,SAP30,327.0,0.505400,0.000000,0.697234,606.565690,3.873874e-04,0.335048,...,1.709079e+09,0.500000,692.0,media/images/Confusion Matrix_218_8bb7708fa5de...,49619.0,image-file,1200.0,png,1200.0,8bb7708fa5de7feeb1d06e439d1d08483f9f17af9479c6...
5187,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,SAP30,324.0,0.879813,0.597406,0.547333,603.007237,1.381381e-07,0.797994,...,1.709079e+09,0.798311,693.0,media/images/Confusion Matrix_216_5a78816cab0e...,57947.0,image-file,1200.0,png,1200.0,5a78816cab0e1fa1c6a6f02ac5e67d8fb0dd137cfec7c4...
5188,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,SAP30,324.0,0.962615,0.799286,0.242748,608.870517,1.381381e-06,0.899477,...,1.709079e+09,0.899412,700.0,media/images/Confusion Matrix_216_265f5743eafa...,59356.0,image-file,1200.0,png,1200.0,265f5743eafafb5593d85b3f5b25a8bde9f25d156c0678...
5189,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,SAP30,324.0,0.951327,0.776720,0.171405,604.127964,1.381381e-05,0.887354,...,1.709079e+09,0.887280,696.0,media/images/Confusion Matrix_216_7375fb841757...,58933.0,image-file,1200.0,png,1200.0,7375fb8417576ec4fc3d46996dd222308076be3b318ac5...


In [10]:
# Remove rows with any NaN values and keep the original index
expanded_runs_df = expanded_runs_df.dropna().reset_index()
expanded_runs_df

Unnamed: 0,index,config,name,tags,_step,eval_auc,eval_mcc,train_loss,_runtime,learning_rate,...,_timestamp,eval_recall,_wandb.runtime,Confusion Matrix.path,Confusion Matrix.size,Confusion Matrix._type,Confusion Matrix.width,Confusion Matrix.format,Confusion Matrix.height,Confusion Matrix.sha256
0,0,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_10_33_logstep=...,SAFB,30.0,0.987925,0.935781,0.112886,74.733844,0.000000e+00,...,1.712586e+09,0.967931,74.0,media/images/Confusion Matrix_27_5d7bf47ec9113...,55532.0,image-file,1200.0,png,1200.0,5d7bf47ec9113c87ade58bd4b1ac511ccf426ff6e91d9c...
1,1,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_09_05_logstep=...,SAFB,30.0,0.606054,0.000000,0.698205,72.861082,0.000000e+00,...,1.712585e+09,0.500000,76.0,media/images/Confusion Matrix_27_7d6b603b312f8...,50723.0,image-file,1200.0,png,1200.0,7d6b603b312f8bdda3e14dc028964f8d2e244322b393a7...
2,2,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_07_33_logstep=...,SAFB,30.0,0.986670,0.936586,0.097331,78.674067,0.000000e+00,...,1.712585e+09,0.968338,82.0,media/images/Confusion Matrix_27_813516ac47ebf...,55238.0,image-file,1200.0,png,1200.0,813516ac47ebf073631d951424352c57d4d1daab428a3b...
3,3,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_10_03_26_logstep=...,AGO1,91.0,0.505238,0.000000,0.697180,214.198024,3.000000e-04,...,1.712585e+09,0.500000,237.0,media/images/Confusion Matrix_78_6e0d20a11b4a2...,51178.0,image-file,1200.0,png,1200.0,6e0d20a11b4a297143c1f4e331fc80d2c4579d6dbbdb8b...
4,4,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_09_59_19_logstep=...,AGO1,91.0,0.964254,0.868696,0.192743,214.822578,1.000000e-04,...,1.712585e+09,0.934146,237.0,media/images/Confusion Matrix_78_8f3382966f137...,55549.0,image-file,1200.0,png,1200.0,8f3382966f137a480fcbbd26570131b803e1df06814214...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4886,5186,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,SAP30,327.0,0.505400,0.000000,0.697234,606.565690,3.873874e-04,...,1.709079e+09,0.500000,692.0,media/images/Confusion Matrix_218_8bb7708fa5de...,49619.0,image-file,1200.0,png,1200.0,8bb7708fa5de7feeb1d06e439d1d08483f9f17af9479c6...
4887,5187,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,SAP30,324.0,0.879813,0.597406,0.547333,603.007237,1.381381e-07,...,1.709079e+09,0.798311,693.0,media/images/Confusion Matrix_216_5a78816cab0e...,57947.0,image-file,1200.0,png,1200.0,5a78816cab0e1fa1c6a6f02ac5e67d8fb0dd137cfec7c4...
4888,5188,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,SAP30,324.0,0.962615,0.799286,0.242748,608.870517,1.381381e-06,...,1.709079e+09,0.899412,700.0,media/images/Confusion Matrix_216_265f5743eafa...,59356.0,image-file,1200.0,png,1200.0,265f5743eafafb5593d85b3f5b25a8bde9f25d156c0678...
4889,5189,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,SAP30,324.0,0.951327,0.776720,0.171405,604.127964,1.381381e-05,...,1.709079e+09,0.887280,696.0,media/images/Confusion Matrix_216_7375fb841757...,58933.0,image-file,1200.0,png,1200.0,7375fb8417576ec4fc3d46996dd222308076be3b318ac5...


In [11]:
# Group by 'tags' and find the index of the max 'eval_acc' for each group
idx = expanded_runs_df.groupby('tags')['eval_acc'].idxmax()

# Use the indices to select rows from the original DataFrame
best_acc_df = expanded_runs_df.loc[idx].reset_index(drop=True)
best_acc_df

Unnamed: 0,index,config,name,tags,_step,eval_auc,eval_mcc,train_loss,_runtime,learning_rate,...,_timestamp,eval_recall,_wandb.runtime,Confusion Matrix.path,Confusion Matrix.size,Confusion Matrix._type,Confusion Matrix.width,Confusion Matrix.format,Confusion Matrix.height,Confusion Matrix.sha256
0,1130,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_02th_2024_01_59_24_logstep=...,ADNP,84.0,0.777999,0.396992,0.491164,195.097749,0.000074,...,1.712038e+09,0.698145,210.0,media/images/Confusion Matrix_72_4cd68014477cf...,59260.0,image-file,1200.0,png,1200.0,4cd68014477cf9a253946fb3a8e5731064dca356908b5e...
1,1350,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_01th_2024_00_56_04_logstep=...,AFF1,112.0,0.856851,0.543392,0.412298,252.408261,0.000074,...,1.711948e+09,0.771234,272.0,media/images/Confusion Matrix_96_57538da06cb05...,56987.0,image-file,1200.0,png,1200.0,57538da06cb056c9ea737cee9856bb0904daea3344d1ba...
2,6,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_08th_2024_09_51_03_logstep=...,AGO1,91.0,0.966290,0.869502,0.173443,214.870189,0.000100,...,1.712584e+09,0.934538,237.0,media/images/Confusion Matrix_78_eb9f9661213cd...,54813.0,image-file,1200.0,png,1200.0,eb9f9661213cd7aa22f6c54b1df7910ca13684deb9f6da...
3,2950,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_11th_2024_07_48_56_logstep=...,AGO2,633.0,0.936196,0.794619,0.264199,1204.465479,0.000004,...,1.710159e+09,0.895822,1362.0,media/images/Confusion Matrix_422_958cd42b536b...,60413.0,image-file,1200.0,png,1200.0,958cd42b536bc3994165310ae2cbd90fbd514c41e93d89...
4,2314,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_22th_2024_03_02_22_logstep=...,ARHGAP35,66.0,0.901186,0.645762,0.333016,125.135130,0.000019,...,1.711091e+09,0.821965,154.0,media/images/Confusion Matrix_44_05bda9ec50bc3...,57200.0,image-file,1200.0,png,1200.0,05bda9ec50bc339493dca11b5d394511f04b407205e0ee...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
416,1394,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_March_29th_2024_22_13_39_logstep=...,ZSCAN4,105.0,0.924647,0.751729,0.235353,241.394078,0.000051,...,1.711765e+09,0.872456,255.0,media/images/Confusion Matrix_90_8c84502b2c48a...,55101.0,image-file,1200.0,png,1200.0,8c84502b2c48ab7e127586b4b2de9ef4de9a25c6b57cda...
417,1336,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_April_01th_2024_01_31_02_logstep=...,ZSCAN5A,40.0,0.939999,0.743027,0.300181,94.731095,0.000000,...,1.711950e+09,0.868192,98.0,media/images/Confusion Matrix_36_15e3928e27a81...,58513.0,image-file,1200.0,png,1200.0,15e3928e27a81126a8756b7a2874f005f41fb2d662555a...
418,1702,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 4...",TFBS_NonTFBS_March_28th_2024_00_35_12_logstep=...,ZSCAN5C,80.0,0.863172,0.564038,0.422188,187.623710,0.000000,...,1.711601e+09,0.781980,192.0,media/images/Confusion Matrix_70_93e14e1228775...,55333.0,image-file,1200.0,png,1200.0,93e14e1228775373425ea1ed716add7bee037c49401919...
419,4524,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_01th_2024_02_09_02_logstep=...,ZSCAN9,282.0,0.890887,0.625169,0.307224,525.080328,0.000132,...,1.709277e+09,0.812420,601.0,media/images/Confusion Matrix_188_e5c3db86ea9e...,54036.0,image-file,1200.0,png,1200.0,e5c3db86ea9e90d0c522f3bbd8795316ce7d735f6f33ba...


In [12]:
best_acc_df.to_csv("All_TFBS_Models_stat.tsv", sep="\t")

In [13]:
best_acc_df.sort_values(by="eval_acc", ascending=False)[['tags','eval_acc']]

Unnamed: 0,tags,eval_acc
217,RBM14,0.968908
230,SAFB,0.968090
265,TAF15,0.959936
254,SPI1,0.957358
290,USF1,0.951848
...,...,...
209,PRPF4,0.500579
94,H3K9me3,0.499966
88,H3K27ac,0.499856
85,H3F3A,0.499651


In [14]:
best_acc_df.sort_values(by="eval_acc", ascending=False)[['tags','eval_acc']].to_csv("TFBS_accuracy_Stat.tsv", sep="\t")

In [None]:
best_acc_df