In [2]:
import pandas as pd
from src.polaris import PolarisDispatcher

df = pd.read_csv("./results/potency_gin_results.csv")
df

Unnamed: 0,task,target_task,batch_size,epochs,lr,num_cv_folds,num_cv_bins,scaffold_split_val_sz,repr_model,in_channels,hidden_channels,num_layers,dropout,out_dim,out_channels,latent_dim,mean_val_loss
0,potency,pIC50 (SARS-CoV-2 Mpro),64,100,0.001,5,10,0.1,GIN,9,64,3,0.01,1,64,64,1.146885
1,potency,pIC50 (MERS-CoV Mpro),64,100,0.001,5,10,0.1,GIN,9,64,3,0.01,1,64,64,1.193432
2,potency,pIC50 (SARS-CoV-2 Mpro),64,100,0.001,5,10,0.1,GIN,9,64,3,0.01,1,128,128,1.154373
3,potency,pIC50 (MERS-CoV Mpro),64,100,0.001,5,10,0.1,GIN,9,64,3,0.01,1,128,128,1.203977
4,potency,pIC50 (SARS-CoV-2 Mpro),32,100,0.001,5,10,0.1,GIN,9,64,3,0.01,1,128,128,1.043353
5,potency,pIC50 (SARS-CoV-2 Mpro),32,100,0.001,5,10,0.1,GIN,9,64,3,0.01,1,64,64,1.044245
6,potency,pIC50 (MERS-CoV Mpro),32,100,0.001,5,10,0.1,GIN,9,64,3,0.01,1,64,64,1.100995
7,potency,pIC50 (MERS-CoV Mpro),32,100,0.001,5,10,0.1,GIN,9,64,3,0.01,1,128,128,1.050218


In [3]:
# Group by target_task and find the row with minimum mean_val_loss for each group
best_models = df.loc[df.groupby('target_task')['mean_val_loss'].idxmin()].reset_index(drop=True)
best_models_list = best_models.to_dict('records')
best_models_list

[{'task': 'potency',
  'target_task': 'pIC50 (MERS-CoV Mpro)',
  'batch_size': 32,
  'epochs': 100,
  'lr': 0.001,
  'num_cv_folds': 5,
  'num_cv_bins': 10,
  'scaffold_split_val_sz': 0.1,
  'repr_model': 'GIN',
  'in_channels': 9,
  'hidden_channels': 64,
  'num_layers': 3,
  'dropout': 0.01,
  'out_dim': 1,
  'out_channels': 128,
  'latent_dim': 128,
  'mean_val_loss': 1.0502175288274884},
 {'task': 'potency',
  'target_task': 'pIC50 (SARS-CoV-2 Mpro)',
  'batch_size': 32,
  'epochs': 100,
  'lr': 0.001,
  'num_cv_folds': 5,
  'num_cv_bins': 10,
  'scaffold_split_val_sz': 0.1,
  'repr_model': 'GIN',
  'in_channels': 9,
  'hidden_channels': 64,
  'num_layers': 3,
  'dropout': 0.01,
  'out_dim': 1,
  'out_channels': 128,
  'latent_dim': 128,
  'mean_val_loss': 1.0433526992797852}]

In [4]:
loss_mers = PolarisDispatcher.train_single(best_models_list[0])
loss_sars = PolarisDispatcher.train_single(best_models_list[1])

print(f"loss_mers: {loss_mers}, loss_sars: {loss_sars}")

Using device: cpu


 19%|█▉        | 19/100 [00:23<01:41,  1.26s/it]


Using device: cpu


 78%|███████▊  | 78/100 [01:28<00:24,  1.13s/it]

loss_mers: [0.952145516872406], loss_sars: [0.6234539151191711]





In [3]:
import yaml
from itertools import product

def generate_combinations(config):
    # Find all parameters that are lists (possible hyperparameters)
    params = {k: v for k, v in config.items() if isinstance(v, list)}

    # Generate all combinations
    param_names = params.keys()
    combinations = list(product(*params.values()))

    # Create full parameter dictionaries
    result = []
    for values in combinations:
        # Start with the base config (for non-list parameters)
        param_dict = config.copy()

        # Update with the current combination
        param_dict.update(dict(zip(param_names, values)))

        result.append(param_dict)

    return result


with open("./config/config_ecfp.yml", "r") as file:
    config = yaml.safe_load(file)

generate_combinations(config)

[{'task': 'potency',
  'target_task': 'pIC50 (MERS-CoV Mpro)',
  'batch_size': 32,
  'epochs': 200,
  'lr': 0.001,
  'num_cv_folds': 5,
  'num_cv_bins': 10,
  'scaffold_split_val_sz': 0.1,
  'repr_model': 'ECFP',
  'radius': 2,
  'fpSize': 1024,
  'latent_dim': 1024,
  'out_dim': 1},
 {'task': 'potency',
  'target_task': 'pIC50 (MERS-CoV Mpro)',
  'batch_size': 32,
  'epochs': 200,
  'lr': 0.001,
  'num_cv_folds': 5,
  'num_cv_bins': 10,
  'scaffold_split_val_sz': 0.1,
  'repr_model': 'ECFP',
  'radius': 2,
  'fpSize': 1024,
  'latent_dim': 2048,
  'out_dim': 1},
 {'task': 'potency',
  'target_task': 'pIC50 (MERS-CoV Mpro)',
  'batch_size': 32,
  'epochs': 200,
  'lr': 0.001,
  'num_cv_folds': 5,
  'num_cv_bins': 10,
  'scaffold_split_val_sz': 0.1,
  'repr_model': 'ECFP',
  'radius': 2,
  'fpSize': 2048,
  'latent_dim': 1024,
  'out_dim': 1},
 {'task': 'potency',
  'target_task': 'pIC50 (MERS-CoV Mpro)',
  'batch_size': 32,
  'epochs': 200,
  'lr': 0.001,
  'num_cv_folds': 5,
  'num_cv