In [2]:
import pandas as pd
import wandb

In [3]:
api = wandb.Api()
entity, project = "pratik24111991", "TFBS_Finetuned_Models_Work"
runs = api.runs(entity + "/" + project)

In [4]:
summary_list, config_list, name_list, tag_list = [], [], [], []
for run in runs:
    # .summary contains output keys/values for
    # metrics such as accuracy.
    #  We call ._json_dict to omit large files
    summary_list.append(run.summary._json_dict)

    # .config contains the hyperparameters.
    #  We remove special values that start with _.
    config_list.append({k: v for k, v in run.config.items() if not k.startswith("_")})

    # .name is the human-readable name of the run.
    name_list.append(run.name)
    tag_list.append(run.tags)

runs_df = pd.DataFrame(
    {"summary": summary_list, "config": config_list, "name": name_list, "tags":tag_list}
)

In [5]:
runs_df

Unnamed: 0,summary,config,name,tags
0,"{'learning_rate': 0.00023768115942028985, 'eva...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_17_59_logstep=...,"[HNF1A, TFBS_NonTFBS, e-4]"
1,{},"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_16_06_logstep=...,"[TFBS_NonTFBS, ZFHX2, e-6]"
2,"{'eval_precision': 0.8039987134197402, 'Confus...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_10_37_logstep=...,"[HNF1A, TFBS_NonTFBS, e-4]"
3,{},"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_10_12_logstep=...,"[RAD21M, TFBS_NonTFBS, e-3]"
4,"{'eval_f1': 0.8037560482288753, '_runtime': 36...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_03_15_logstep=...,"[HNF1A, TFBS_NonTFBS, e-4]"
...,...,...,...,...
3317,"{'_step': 327, 'eval_f1': 0.33504827683416727,...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,"[SAP30, TFBS_NonTFBS, e-3]"
3318,"{'eval_recall': 0.79831056939694, 'Confusion M...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,"[SAP30, TFBS_NonTFBS, e-6]"
3319,"{'eval_recall': 0.8994120648308664, '_wandb': ...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,"[SAP30, TFBS_NonTFBS, e-5]"
3320,"{'_step': 324, 'eval_f1': 0.8873539005565894, ...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,"[SAP30, TFBS_NonTFBS, e-4]"


In [6]:
# Define a function to filter tags
def filter_tags(tags):
    return [tag for tag in tags if '-' not in tag and 'TFBS' not in tag]

In [7]:
runs_df['tags'] = runs_df['tags'].apply(filter_tags)

In [8]:
runs_df

Unnamed: 0,summary,config,name,tags
0,"{'learning_rate': 0.00023768115942028985, 'eva...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_17_59_logstep=...,[HNF1A]
1,{},"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_16_06_logstep=...,[ZFHX2]
2,"{'eval_precision': 0.8039987134197402, 'Confus...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_10_37_logstep=...,[HNF1A]
3,{},"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_10_12_logstep=...,[RAD21M]
4,"{'eval_f1': 0.8037560482288753, '_runtime': 36...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_03_15_logstep=...,[HNF1A]
...,...,...,...,...
3317,"{'_step': 327, 'eval_f1': 0.33504827683416727,...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,[SAP30]
3318,"{'eval_recall': 0.79831056939694, 'Confusion M...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,[SAP30]
3319,"{'eval_recall': 0.8994120648308664, '_wandb': ...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,[SAP30]
3320,"{'_step': 324, 'eval_f1': 0.8873539005565894, ...","{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,[SAP30]


In [9]:
# Normalize the summary column to create a DataFrame of summaries
summaries_df = pd.json_normalize(runs_df['summary'])

# Concatenate the new summary columns with the original DataFrame (minus the old 'summary' column)
expanded_runs_df = pd.concat([runs_df.drop('summary', axis=1), summaries_df], axis=1)
# Assuming expanded_runs_df is your DataFrame
expanded_runs_df['tags'] = expanded_runs_df['tags'].apply(lambda x: x[0] if x else None)

In [10]:
expanded_runs_df

Unnamed: 0,config,name,tags,learning_rate,eval_precision,eval_f1,_runtime,eval_acc,_timestamp,train_loss,...,eval_mcc,eval_recall,Confusion Matrix._type,Confusion Matrix.width,Confusion Matrix.format,Confusion Matrix.height,Confusion Matrix.sha256,Confusion Matrix.path,Confusion Matrix.size,_wandb.runtime
0,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_17_59_logstep=...,HNF1A,2.376812e-04,0.793641,0.793529,128.442950,0.793529,1.711513e+09,0.540744,...,0.587287,0.793646,image-file,1200.0,png,1200.0,193b495d7a59fac62a5b690af057b57cdcb4340f31ff10...,media/images/Confusion Matrix_0_193b495d7a59fa...,60387.0,
1,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_16_06_logstep=...,ZFHX2,,,,,,,,...,,,,,,,,,,
2,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_10_37_logstep=...,HNF1A,1.545894e-05,0.803999,0.800516,367.176893,0.800835,1.711513e+09,0.359064,...,0.605445,0.801451,image-file,1200.0,png,1200.0,08cdbeba1ce37fa6a7d834b8d1082dcfec19493407cebb...,media/images/Confusion Matrix_132_08cdbeba1ce3...,57333.0,430.0
3,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_10_12_logstep=...,RAD21M,,,,,,,,...,,,,,,,,,,
4,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_03_15_logstep=...,HNF1A,4.637681e-05,0.806298,0.803756,366.898376,0.803966,1.711513e+09,0.260668,...,0.610788,0.804492,image-file,1200.0,png,1200.0,0695be53a7b1600795f4fb41793dec6c3f2a4fbe2e16ea...,media/images/Confusion Matrix_132_0695be53a7b1...,58316.0,430.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3317,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,SAP30,3.873874e-04,0.251934,0.335048,606.565690,0.503869,1.709079e+09,0.697234,...,0.000000,0.500000,image-file,1200.0,png,1200.0,8bb7708fa5de7feeb1d06e439d1d08483f9f17af9479c6...,media/images/Confusion Matrix_218_8bb7708fa5de...,49619.0,692.0
3318,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,SAP30,1.381381e-07,0.799096,0.797994,603.007237,0.798092,1.709079e+09,0.547333,...,0.597406,0.798311,image-file,1200.0,png,1200.0,5a78816cab0e1fa1c6a6f02ac5e67d8fb0dd137cfec7c4...,media/images/Confusion Matrix_216_5a78816cab0e...,57947.0,693.0
3319,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,SAP30,1.381381e-06,0.899874,0.899477,608.870517,0.899523,1.709079e+09,0.242748,...,0.799286,0.899412,image-file,1200.0,png,1200.0,265f5743eafafb5593d85b3f5b25a8bde9f25d156c0678...,media/images/Confusion Matrix_216_265f5743eafa...,59356.0,700.0
3320,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,SAP30,1.381381e-05,0.889443,0.887354,604.127964,0.887546,1.709079e+09,0.171405,...,0.776720,0.887280,image-file,1200.0,png,1200.0,7375fb8417576ec4fc3d46996dd222308076be3b318ac5...,media/images/Confusion Matrix_216_7375fb841757...,58933.0,696.0


In [11]:
# Remove rows with any NaN values and keep the original index
expanded_runs_df = expanded_runs_df.dropna().reset_index()
expanded_runs_df

Unnamed: 0,index,config,name,tags,learning_rate,eval_precision,eval_f1,_runtime,eval_acc,_timestamp,...,eval_mcc,eval_recall,Confusion Matrix._type,Confusion Matrix.width,Confusion Matrix.format,Confusion Matrix.height,Confusion Matrix.sha256,Confusion Matrix.path,Confusion Matrix.size,_wandb.runtime
0,2,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_10_37_logstep=...,HNF1A,1.545894e-05,0.803999,0.800516,367.176893,0.800835,1.711513e+09,...,0.605445,0.801451,image-file,1200.0,png,1200.0,08cdbeba1ce37fa6a7d834b8d1082dcfec19493407cebb...,media/images/Confusion Matrix_132_08cdbeba1ce3...,57333.0,430.0
1,4,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_03_15_logstep=...,HNF1A,4.637681e-05,0.806298,0.803756,366.898376,0.803966,1.711513e+09,...,0.610788,0.804492,image-file,1200.0,png,1200.0,0695be53a7b1600795f4fb41793dec6c3f2a4fbe2e16ea...,media/images/Confusion Matrix_132_0695be53a7b1...,58316.0,430.0
2,5,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_27th_2024_00_00_49_logstep=...,DDX20,4.333333e-04,0.250334,0.333630,475.843682,0.500669,1.711513e+09,...,0.000000,0.500000,image-file,1200.0,png,1200.0,d259a3ccfab252f0fb2e561554cd9cb4005eb0772c375b...,media/images/Confusion Matrix_174_d259a3ccfab2...,52073.0,551.0
3,6,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_26th_2024_23_55_54_logstep=...,HNF1A,1.545894e-05,0.802273,0.797387,367.348636,0.797878,1.711512e+09,...,0.600870,0.798608,image-file,1200.0,png,1200.0,6c4b1cf21a2040ecf497a2f175a83ddd5304214f8d5b07...,media/images/Confusion Matrix_132_6c4b1cf21a20...,58753.0,429.0
4,7,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_26th_2024_23_51_27_logstep=...,DDX20,1.444444e-04,0.250334,0.333630,475.562395,0.500669,1.711512e+09,...,0.000000,0.500000,image-file,1200.0,png,1200.0,d259a3ccfab252f0fb2e561554cd9cb4005eb0772c375b...,media/images/Confusion Matrix_174_d259a3ccfab2...,52073.0,551.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3231,3317,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_19_05_22_logst...,SAP30,3.873874e-04,0.251934,0.335048,606.565690,0.503869,1.709079e+09,...,0.000000,0.500000,image-file,1200.0,png,1200.0,8bb7708fa5de7feeb1d06e439d1d08483f9f17af9479c6...,media/images/Confusion Matrix_218_8bb7708fa5de...,49619.0,692.0
3232,3318,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_56_57_logst...,SAP30,1.381381e-07,0.799096,0.797994,603.007237,0.798092,1.709079e+09,...,0.597406,0.798311,image-file,1200.0,png,1200.0,5a78816cab0e1fa1c6a6f02ac5e67d8fb0dd137cfec7c4...,media/images/Confusion Matrix_216_5a78816cab0e...,57947.0,693.0
3233,3319,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_50_logst...,SAP30,1.381381e-06,0.899874,0.899477,608.870517,0.899523,1.709079e+09,...,0.799286,0.899412,image-file,1200.0,png,1200.0,265f5743eafafb5593d85b3f5b25a8bde9f25d156c0678...,media/images/Confusion Matrix_216_265f5743eafa...,59356.0,700.0
3234,3320,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_27th_2024_18_55_00_logst...,SAP30,1.381381e-05,0.889443,0.887354,604.127964,0.887546,1.709079e+09,...,0.776720,0.887280,image-file,1200.0,png,1200.0,7375fb8417576ec4fc3d46996dd222308076be3b318ac5...,media/images/Confusion Matrix_216_7375fb841757...,58933.0,696.0


In [12]:
# Group by 'tags' and find the index of the max 'eval_acc' for each group
idx = expanded_runs_df.groupby('tags')['eval_acc'].idxmax()

# Use the indices to select rows from the original DataFrame
best_acc_df = expanded_runs_df.loc[idx].reset_index(drop=True)
best_acc_df

Unnamed: 0,index,config,name,tags,learning_rate,eval_precision,eval_f1,_runtime,eval_acc,_timestamp,...,eval_mcc,eval_recall,Confusion Matrix._type,Confusion Matrix.width,Confusion Matrix.format,Confusion Matrix.height,Confusion Matrix.sha256,Confusion Matrix.path,Confusion Matrix.size,_wandb.runtime
0,1081,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_11th_2024_07_48_56_logstep=...,AGO2,0.000004,0.898803,0.895784,1204.465479,0.895999,1.710159e+09,...,0.794619,0.895822,image-file,1200.0,png,1200.0,958cd42b536bc3994165310ae2cbd90fbd514c41e93d89...,media/images/Confusion Matrix_422_958cd42b536b...,60413.0,1362.0
1,445,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_22th_2024_03_02_22_logstep=...,ARHGAP35,0.000019,0.823799,0.821313,125.135130,0.821501,1.711091e+09,...,0.645762,0.821965,image-file,1200.0,png,1200.0,05bda9ec50bc339493dca11b5d394511f04b407205e0ee...,media/images/Confusion Matrix_44_05bda9ec50bc3...,57200.0,154.0
2,2032,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_03th_2024_07_58_34_logstep=...,ARID3A,0.000036,0.768812,0.763331,1507.292893,0.764071,1.709472e+09,...,0.533582,0.764785,image-file,1200.0,png,1200.0,9790b8d093b46b5410432abe0ae5c05698c8f6338f0c86...,media/images/Confusion Matrix_530_9790b8d093b4...,59950.0,1699.0
3,1269,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_10th_2024_18_51_55_logstep=...,ARNT,0.000121,0.750809,0.750420,1621.048053,0.750443,1.710113e+09,...,0.501429,0.750620,image-file,1200.0,png,1200.0,4654e6e9f4b65a33e5232fe1c4e4f3ad0d152246fc91aa...,media/images/Confusion Matrix_582_4654e6e9f4b6...,58394.0,1831.0
4,2682,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_February_29th_2024_20_07_02_logst...,BLC3,0.000034,0.909569,0.908875,6767.668473,0.908906,1.709262e+09,...,0.818516,0.908947,image-file,1200.0,png,1200.0,62feb30361869d99e534374c07accb544ecca05e54c11e...,media/images/Confusion Matrix_2370_62feb303618...,62161.0,7580.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,299,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_22th_2024_16_27_48_logstep=...,ZNF785,0.000019,0.875901,0.875608,178.889156,0.875611,1.711139e+09,...,0.751668,0.875766,image-file,1200.0,png,1200.0,774761deed86c76bcda8a44b4a3b2a90bb747b702d742e...,media/images/Confusion Matrix_66_774761deed86c...,56456.0,219.0
147,1583,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_09th_2024_03_13_54_logstep=...,ZNF792,0.000126,0.812972,0.808163,757.876489,0.809028,1.709973e+09,...,0.621292,0.808337,image-file,1200.0,png,1200.0,0a705a0c192a9c707044695c28c4b400f4158812f32126...,media/images/Confusion Matrix_272_0a705a0c192a...,59776.0,862.0
148,193,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_24th_2024_11_52_13_logstep=...,ZSCAN23,0.000018,0.781929,0.774944,229.328597,0.776079,1.711296e+09,...,0.558057,0.776157,image-file,1200.0,png,1200.0,f1f3edfd44a11d0b78090ba0a1cd55c3cd31b4e702be1c...,media/images/Confusion Matrix_84_f1f3edfd44a11...,56203.0,275.0
149,2655,"{'epochs': 10, 'Dropout': 0.1, 'batch_size': 1...",TFBS_NonTFBS_March_01th_2024_02_09_02_logstep=...,ZSCAN9,0.000132,0.812749,0.812409,525.080328,0.812469,1.709277e+09,...,0.625169,0.812420,image-file,1200.0,png,1200.0,e5c3db86ea9e90d0c522f3bbd8795316ce7d735f6f33ba...,media/images/Confusion Matrix_188_e5c3db86ea9e...,54036.0,601.0


In [13]:
best_acc_df.to_csv("All_TFBS_Models_stat.tsv", sep="\t")

In [12]:
best_acc_df.sort_values(by="eval_acc", ascending=False)[['tags','eval_acc']]

Unnamed: 0,tags,eval_acc
62,RBM14,0.968908
72,SPI1,0.957358
42,MAFF,0.939551
78,THAP1,0.937964
14,E2F4,0.918800
...,...,...
35,ILF3,0.715873
10,CHD7,0.709527
58,PTRF,0.705040
79,THRAP3,0.683565


In [13]:
best_acc_df.sort_values(by="eval_acc", ascending=False)[['tags','eval_acc']].to_csv("TFBS_accuracy_Stat.tsv", sep="\t")

In [None]:
best_acc_df