Organize the dataframes to a point where we can manually assemble the result tables by copy-and-paste.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import os
import numpy as np

print(os.listdir("plaid_csvs"))
print(os.listdir("plaid_csvs/peer"))

['peer', 'reconstruction']
['thermo_len256.csv', 'fold_len256.csv', 'original_peer_stripped.csv', 'binloc_len256.csv', 'stability_len256.csv', 'soluability_len256.csv', 'subloc_len256.csv', 'beta_len256.csv', 'fluo_len256.csv', 'properties_benchmark_len256.csv']


In [26]:
d = {}
for file in os.listdir("plaid_csvs/peer"):
    d[str(file).split("_")[0]] = pd.read_csv(f"plaid_csvs/peer/{file}", sep=",")

original_df = d.pop("original")
property_df = d.pop("properties")

In [37]:
outdf = original_df.set_index("Unnamed: 0")
outdf = outdf.drop(['ESM-1b', 'ProtBert'], axis=1)
outdf = outdf.transpose()
outdf.head(n=10)

Unnamed: 0,Flu,Sta,β-lac,Sol,Sub,Bin,Cont,Fold,SSP,Yst,Hum,Aff,PDB,BDB
DDE,0.638,0.652,0.623,59.77,49.17,77.43,--,9.57,--,55.83,62.77,2.908,--,--
Moran,0.4,0.322,0.375,57.73,31.13,55.63,26.34,7.1,68.99,53.0,54.67,2.984,1.457,1.572
LSTM,0.494,0.533,0.139,70.18,62.98,88.11,17.5,8.24,59.62,53.62,63.75,2.853,1.455,1.566
Transformer,0.643,0.649,0.261,70.12,56.02,75.74,10.0,8.52,66.07,54.12,59.58,2.499,1.376,1.497
CNN,0.682,0.637,0.781,64.43,58.73,82.67,20.43,10.93,69.56,55.07,62.6,2.796,1.441,1.565
ResNet,0.636,0.126,0.152,67.33,52.3,78.99,39.66,8.89,82.18,48.91,68.61,3.005,1.562,1.549
ProtBert*,0.339,0.697,0.616,59.17,59.44,81.54,45.78,10.74,82.73,53.87,83.61,2.996,1.559,1.556
ESM-1b*,0.43,0.75,0.528,67.02,79.82,91.61,,29.95,,66.07,88.06,3.031,,


In [10]:
from plaid.constants import COMPRESSION_INPUT_DIMENSIONS, COMPRESSION_SHORTEN_FACTORS

In [33]:
def mapping_fn(model_id, mode='dim'):
    assert mode in ['dim', 'shorten']
    if model_id == "identity":
        return 1024
    else:
        try:
            if mode == "dim":
                return COMPRESSION_INPUT_DIMENSIONS[model_id]
            elif mode == "shorten":
                return COMPRESSION_SHORTEN_FACTORS[model_id]
        except:
            return np.nan

clean = {}

for key, df in d.items():
    df['input_dim'] = df['task.model.compression_model_id'].map(lambda x: mapping_fn(x, "dim"))
    df['shorten_factor'] = df['task.model.compression_model_id'].map(lambda x: mapping_fn(x, "shorten"))
    df = df.sort_values(by="input_dim")
    df = df.dropna(axis=1)
    cols_to_keep = list(filter(lambda x: "[" in x, df.columns))
    cols_to_keep = list(filter(lambda x: ("acc" in x) or ("spearman" in x), cols_to_keep))
    cols_to_keep += ['input_dim','shorten_factor']
    clean[key] = df.filter(cols_to_keep, axis=1)


In [38]:
for k, df in clean.items():
    print(k, "________________")
    display(df)

thermo ________________


Unnamed: 0,valid/epoch/spearmanr [target],input_dim,shorten_factor
0,0.198137,4,2
1,0.175896,8,2
2,0.358168,16,2
3,0.431941,32,2
4,0.504641,64,2
5,0.527957,128,2
6,0.559201,256,2
7,0.56057,512,2
8,0.589104,1024,2
9,0.583282,1024,1024


fold ________________


Unnamed: 0,valid/epoch/accuracy [fold_label],input_dim,shorten_factor
0,0.043478,4,2
1,0.148098,8,2
2,0.25,16,2
3,0.336957,32,2
4,0.451087,64,2
5,0.46875,128,2
6,0.504076,256,2
7,0.53125,512,2
8,0.538043,1024,2
9,0.516304,1024,1024


binloc ________________


Unnamed: 0,valid/epoch/accuracy [localization],input_dim,shorten_factor
0,0.684789,4,2
1,0.742047,8,2
2,0.8369,16,2
3,0.869289,32,2
4,0.895894,64,2
5,0.896472,128,2
6,0.923077,256,2
7,0.934066,512,2
8,0.929439,1024,2
9,0.935801,1024,1024


stability ________________


Unnamed: 0,valid/epoch/spearmanr [stability_score],input_dim,shorten_factor
0,0.398098,4,2
1,0.44012,8,2
2,0.546117,16,2
3,0.556087,32,2
4,0.563771,64,2
5,0.582749,128,2
6,0.604562,256,2
7,0.632242,512,2
8,0.636395,1024,2
9,0.630828,1024,1024


soluability ________________


Unnamed: 0,valid/epoch/accuracy [solubility],input_dim,shorten_factor
1,0.597378,4,2
0,0.644195,8,2
2,0.651109,16,2
3,0.673005,32,2
5,0.692164,64,2
4,0.700375,128,2
6,0.706569,256,2
7,0.717661,512,2
8,0.724575,1024,2
9,0.719965,1024,1024


subloc ________________


Unnamed: 0,valid/epoch/accuracy [localization],input_dim,shorten_factor
0,0.331199,4,2
1,0.446105,8,2
2,0.536108,16,2
3,0.569548,32,2
4,0.622199,64,2
5,0.681964,128,2
6,0.718605,256,2
7,0.742796,512,2
8,0.75916,1024,2
9,0.754536,1024,1024


beta ________________


Unnamed: 0,valid/epoch/spearmanr [scaled_effect1]
0,0.134617
1,0.153607
3,0.142555
2,0.268808
4,0.42999
5,0.380468
6,0.626545


fluo ________________


Unnamed: 0,valid/epoch/spearmanr [log_fluorescence],input_dim,shorten_factor
0,0.143924,4,2
1,0.218374,8,2
2,0.265353,16,2
3,0.282104,32,2
4,0.309722,64,2
5,0.408947,128,2
6,0.473471,256,2
7,0.508158,512,2
8,0.517653,1024,2
9,0.384684,1024,1024


In [40]:
d['beta']

Unnamed: 0,Name,test_set.dataset.class,task.model.compression_model_id,Runtime,task.metric,valid/epoch/mean absolute error [scaled_effect1],valid/epoch/root mean squared error [scaled_effect1],valid/epoch/spearmanr [scaled_effect1],Notes,User,...,valid/epoch/root mean squared error [stability_score],valid/epoch/spearmanr [stability_score],train/epoch/mean squared error,valid/epoch/mean absolute error [target],valid/epoch/root mean squared error [target],valid/epoch/spearmanr [target],valid/epoch/accuracy [fold_label],valid/epoch/matthews correlation coefficient [fold_label],input_dim,shorten_factor
0,robust-totem-138,datasets.BetaLactamase,8ebs7j9h,6707,"[""mae"",""rmse"",""spearmanr""]",0.262112,0.315069,0.134617,-,lu-amy-al1,...,,,0.100203,,,,,,4.0,2.0
1,serene-flower-137,datasets.BetaLactamase,mm9fe6x9,6743,"[""mae"",""rmse"",""spearmanr""]",0.261212,0.31508,0.153607,-,lu-amy-al1,...,,,0.100255,,,,,,8.0,2.0
2,dashing-butterfly-136,datasets.BetaLactamase,fbbrfqzk,6716,"[""mae"",""rmse"",""spearmanr""]",0.260148,0.314621,0.268808,-,lu-amy-al1,...,,,0.099973,,,,,,32.0,2.0
3,helpful-paper-135,datasets.BetaLactamase,kyytc8i9,6699,"[""mae"",""rmse"",""spearmanr""]",0.259836,0.314951,0.142555,-,lu-amy-al1,...,,,0.100211,,,,,,16.0,2.0
4,smart-voice-134,datasets.BetaLactamase,g8e83omk,6812,"[""mae"",""rmse"",""spearmanr""]",0.257581,0.302427,0.42999,-,lu-amy-al1,...,,,0.093096,,,,,,1024.0,2.0
5,smooth-violet-133,datasets.BetaLactamase,identity,5949,"[""mae"",""rmse"",""spearmanr""]",0.247324,0.298416,0.380468,-,lu-amy-al1,...,,,0.091871,,,,,,1024.0,1024.0
6,esm1b,datasets.BetaLactamase,,9899,"[""mae"",""rmse"",""spearmanr""]",0.191191,0.271582,0.626545,-,lu-amy-al1,...,,,0.070673,,,,,,,
