In [3]:
import torch, torch.nn as nn
import numpy as np
from egnn_pytorch import EGNN
from architecture import (StackedEGNN,
                          LearnableRBF,
                          AttentionBlock,
                          TunableBlock)
import time, datetime
import glob
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.neighbors import NearestNeighbors
from torch.utils.data import Dataset, DataLoader
import torch, torch.nn as nn
import numpy as np
from egnn_pytorch import EGNN
from architecture import (StackedEGNN,
                          LearnableRBF,
                          AttentionBlock,
                          TunableBlock)
import time, datetime
import glob
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.neighbors import NearestNeighbors
from torch.utils.data import Dataset, DataLoader

torch.backends.cudnn.deterministic=True; torch.backends.cudnn.benchmark=False

# ================================================================
# 0) dashboard – flip anything here
# ================================================================
class Cfg(dict):
    __getattr__ = dict.__getitem__; __setattr__ = dict.__setitem__


# ================================================================
# 1) reproducibility
# ================================================================
import random, os, numpy as np, torch, glob, datetime


# ================================================================
# 2) dataset helpers
# ================================================================
from sklearn.neighbors import NearestNeighbors
from torch.utils.data import Dataset, DataLoader
class HoodDS(Dataset):
    def __init__(self, paths, k):
        self.data=[]; self.ids=[]
        nbr=NearestNeighbors(k,algorithm='brute')
        for p in paths:
            try:
                d=np.load(p,allow_pickle=True)
                if len(d['sites'])==0: continue
                nbr.fit(d['pos']); idx=nbr.kneighbors(d['sites'],return_distance=False)
                self.data.append((torch.from_numpy(d['z'][idx]),
                                  torch.from_numpy(d['pos'][idx]),
                                  torch.from_numpy(d['pks'])))
                self.ids.append(os.path.splitext(os.path.basename(p))[0])
            except Exception as e: print("skip",p,e)
    def __len__(self): return len(self.data)
    def __getitem__(self,i):
        z,p,y=self.data[i]; return z,p,y,self.ids[i]

def pad(batch,k,device,ret_ids):
    ids=[b[3] for b in batch] if ret_ids else None
    B=len(batch); S=max(b[0].shape[0] for b in batch)
    zt=torch.zeros(B,S,k,dtype=torch.int32,device=device)
    pt=torch.zeros(B,S,k,3,dtype=torch.float32,device=device)
    yt=torch.full((B,S),float('nan'),device=device); mt=torch.zeros(B,S,dtype=torch.bool,device=device)
    for b,(z,p,y,_) in enumerate(batch):
        s=z.shape[0]; zt[b,:s]=z; pt[b,:s]=p; yt[b,:s]=y; mt[b,:s]=True
    return (zt,pt,yt,mt,ids) if ret_ids else (zt,pt,yt,mt)

def split(paths):
    if cfg.num_paths: paths=paths[:cfg.num_paths]
    rng=np.random.RandomState(cfg.split_seed)
    idx=rng.permutation(len(paths)); cut=int(len(paths)*cfg.split_ratio)
    return [paths[i] for i in idx[:cut]], [paths[i] for i in idx[cut:]]

# ================================================================
# 3) model
# ================================================================

class Model(nn.Module):
    def __init__(self,c):
        super().__init__(); self.c=c
        C = c.dim + c.basis

        self.egnn = StackedEGNN(c.dim,c.depth,c.hidden_dim,c.dropout,
                                c.hood_k,98,c.num_neighbors,c.norm_coors).to(c.device)

        self.rbf  = TunableBlock(LearnableRBF(c.basis,10.).to(c.device), c.use_rbf)
        self.attn = TunableBlock(AttentionBlock(C,C,c.hidden_dim).to(c.device), c.use_attn)

        if c.aggregator=='linear':
            self.agg = nn.Linear(C,1).to(c.device)
        elif c.aggregator=='nconv':
            self.agg = nn.Conv1d(c.hood_k,1,kernel_size=C,padding=0).to(c.device)
        elif c.aggregator=='pool':
            self.agg = None
        else: raise ValueError("aggregator must be 'linear' | 'nconv' | 'pool'")

        self.boost = nn.Linear(1,1).to(c.device) if c.use_boost else nn.Identity()
        self.prot  = EGNN(dim=1,update_coors=True,num_nearest_neighbors=3).to(c.device) \
                     if c.use_prot else nn.Identity()
        self.conv  = nn.Conv1d(1,1,c.conv_kernel,padding=c.conv_kernel//2).to(c.device) \
                     if c.use_conv else nn.Identity()

    def forward(self,z,x):
        h,coord=self.egnn(z,x); h=h[0]                # (R,N,dim)
        cent=coord.mean(1,keepdim=True)               # (R,1,3)

        # --- build token ----------------------------------------------------------------
        r = self.rbf(cent,coord).transpose(1,2) if self.c.use_rbf else \
            h.new_zeros(h.size(0),self.c.basis,self.c.hood_k)
        tok = torch.cat((r,h.transpose(1,2)),1)       # (R,C,N)

        att = self.attn(tok.permute(2,0,1))
        tok = att[0] if isinstance(att,(tuple,list)) else att
        tok = tok.permute(1,0,2)                      # (R,N,C)

        # --- aggregation ----------------------------------------------------------------
        if self.c.aggregator=='linear':
            preds = self.agg(tok) .max(1).values                # (R,1)
        elif self.c.aggregator=='nconv':
            preds = self.agg(tok).squeeze(-1)                   # (R,1)
        else:   # pool
            preds = tok.max(1).values.mean(1,keepdim=True)      # (R,1)

        preds = self.boost(preds)

        if self.c.use_prot:
            preds = self.prot(preds.unsqueeze(0),
                              cent.permute(1,0,2))[0].squeeze(0)

        if self.c.use_conv:
            preds = self.conv(preds.T.unsqueeze(0)).squeeze(0).T

        return preds


In [None]:
t0=time.time()
cfg = Cfg(
    # backbone
    dim=12, basis=6, depth=2, hidden_dim=4, dropout=0.02,
    hood_k=100, num_neighbors=8, norm_coors=True,

    # aggregation: 'linear' | 'nconv' | 'pool'
    aggregator   ='pool',

    # block switches
    use_rbf      =False,
    use_attn     =False,
    use_boost    =False,     # Linear(1→1) after aggregator
    use_prot     =False,      # protein‑level EGNN
    use_conv     =False,     # 1‑D conv after prot EGNN
    conv_kernel  =7,

    # training
    loss_type='mae', study_metrics=False,
    lr=5e-3, epochs=10, batch_size=1, #batchsize not safw to inc

    # misc
    device='cuda' if torch.cuda.is_available() else 'cpu',
    seed=0, analysis_mode=False,
    num_paths=20, split_ratio=0.5, split_seed=0,
    runid=datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
)
print("Run‑ID:", cfg.runid)
random.seed(cfg.seed); np.random.seed(cfg.seed); torch.manual_seed(cfg.seed)

model=Model(cfg); print("params:",sum(p.numel() for p in model.parameters()))

# ================================================================
# 4) loaders
# ================================================================
allp=glob.glob("../../../data/pkegnn_INS/inputs/*.npz")
tr,val=split(allp)
train_ds=HoodDS(tr,cfg.hood_k); val_ds=HoodDS(val,cfg.hood_k)
coll=lambda b: pad(b,cfg.hood_k,cfg.device,cfg.analysis_mode)
tr_loader=DataLoader(train_ds,batch_size=cfg.batch_size,shuffle=True ,collate_fn=coll)
va_loader=DataLoader(val_ds,batch_size=cfg.batch_size,shuffle=False,collate_fn=coll)

# ================================================================
# 5) training utils
# ================================================================
p_fn = nn.L1Loss() if cfg.loss_type=='mae' else nn.MSELoss()
opt  = torch.optim.AdamW(model.parameters(),lr=cfg.lr)
sch  = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,'min',0.5,3)
from torch.cuda.amp import GradScaler, autocast
scaler=GradScaler(enabled=(cfg.device=='cuda'))

def run(loader,train):
    model.train() if train else model.eval(); loss_sum=0;n=0
    for z,x,y,m,*_ in loader:
        v=m.view(-1); z=z.view(-1,z.size(2))[v].to(cfg.device)
        x=x.view(-1,x.size(2),3)[v].to(cfg.device); y=y.view(-1)[v].to(cfg.device)
        with autocast(enabled=(cfg.device=='cuda')):
            pred=model(z,x).flatten(); loss=p_fn(pred,y)
        if train:
            opt.zero_grad(); scaler.scale(loss).backward()
            scaler.step(opt); scaler.update()
        loss_sum+=loss.item(); n+=1
    return loss_sum/n

# ================================================================
# 6) train
# ================================================================
for e in range(cfg.epochs):
    tr=run(tr_loader,True)
    va=run(va_loader,False); sch.step(va)
    print(f"[{e+1}/{cfg.epochs}]  train {tr:.4f} | val {va:.4f}")
print(time.time() - t0,"sec")

Run‑ID: 20250726_225207
params: 16348






[1/10]  train 1.6758 | val 1.1467
[2/10]  train 1.2742 | val 1.1439
[3/10]  train 1.2657 | val 1.1457
[4/10]  train 1.2656 | val 1.1468
[5/10]  train 1.2655 | val 1.1471
[6/10]  train 1.2655 | val 1.1479
[7/10]  train 1.2654 | val 1.1481
[8/10]  train 1.2654 | val 1.1481
[9/10]  train 1.2654 | val 1.1482
[10/10]  train 1.2655 | val 1.1481


lin agg

In [4]:
t0=time.time()
cfg = Cfg(
    # backbone
    dim=12, basis=6, depth=2, hidden_dim=4, dropout=0.02,
    hood_k=100, num_neighbors=8, norm_coors=True,

    # aggregation: 'linear' | 'nconv' | 'pool'
    aggregator   ='linear',

    # block switches
    use_rbf      =False,
    use_attn     =False,
    use_boost    =False,     # Linear(1→1) after aggregator
    use_prot     =False,      # protein‑level EGNN
    use_conv     =False,     # 1‑D conv after prot EGNN
    conv_kernel  =7,

    # training
    loss_type='mae', study_metrics=False,
    lr=5e-3, epochs=10, batch_size=1, #batchsize not safw to inc

    # misc
    device='cuda' if torch.cuda.is_available() else 'cpu',
    seed=0, analysis_mode=False,
    num_paths=20, split_ratio=0.5, split_seed=0,
    runid=datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
)
print("Run‑ID:", cfg.runid)
random.seed(cfg.seed); np.random.seed(cfg.seed); torch.manual_seed(cfg.seed)

model=Model(cfg); print("params:",sum(p.numel() for p in model.parameters()))

# ================================================================
# 4) loaders
# ================================================================
allp=glob.glob("../../../data/pkegnn_INS/inputs/*.npz")
tr,val=split(allp)
train_ds=HoodDS(tr,cfg.hood_k); val_ds=HoodDS(val,cfg.hood_k)
coll=lambda b: pad(b,cfg.hood_k,cfg.device,cfg.analysis_mode)
tr_loader=DataLoader(train_ds,batch_size=cfg.batch_size,shuffle=True ,collate_fn=coll)
va_loader=DataLoader(val_ds,batch_size=cfg.batch_size,shuffle=False,collate_fn=coll)

# ================================================================
# 5) training utils
# ================================================================
p_fn = nn.L1Loss() if cfg.loss_type=='mae' else nn.MSELoss()
opt  = torch.optim.AdamW(model.parameters(),lr=cfg.lr)
sch  = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,'min',0.5,3)
from torch.cuda.amp import GradScaler, autocast
scaler=GradScaler(enabled=(cfg.device=='cuda'))

def run(loader,train):
    model.train() if train else model.eval(); loss_sum=0;n=0
    for z,x,y,m,*_ in loader:
        v=m.view(-1); z=z.view(-1,z.size(2))[v].to(cfg.device)
        x=x.view(-1,x.size(2),3)[v].to(cfg.device); y=y.view(-1)[v].to(cfg.device)
        with autocast(enabled=(cfg.device=='cuda')):
            pred=model(z,x).flatten(); loss=p_fn(pred,y)
        if train:
            opt.zero_grad(); scaler.scale(loss).backward()
            scaler.step(opt); scaler.update()
        loss_sum+=loss.item(); n+=1
    return loss_sum/n

# ================================================================
# 6) train
# ================================================================
for e in range(cfg.epochs):
    tr=run(tr_loader,True)
    va=run(va_loader,False); sch.step(va)
    print(f"[{e+1}/{cfg.epochs}]  train {tr:.4f} | val {va:.4f}")
print(time.time() - t0,"sec")

Run‑ID: 20250726_225444
params: 16367




[1/10]  train 1.3989 | val 1.1896
[2/10]  train 1.2674 | val 1.1400
[3/10]  train 1.2522 | val 1.1206
[4/10]  train 1.2236 | val 1.1170
[5/10]  train 1.2026 | val 1.1360
[6/10]  train 1.1462 | val 1.0370
[7/10]  train 1.1655 | val 1.1616
[8/10]  train 1.1135 | val 1.0250
[9/10]  train 1.0366 | val 0.9737
[10/10]  train 0.9746 | val 0.9117
98.28616762161255 sec


conv agg

In [5]:
t0=time.time()
cfg = Cfg(
    # backbone
    dim=12, basis=6, depth=2, hidden_dim=4, dropout=0.02,
    hood_k=100, num_neighbors=8, norm_coors=True,

    # aggregation: 'linear' | 'nconv' | 'pool'
    aggregator   ='nconv',

    # block switches
    use_rbf      =False,
    use_attn     =False,
    use_boost    =False,     # Linear(1→1) after aggregator
    use_prot     =False,      # protein‑level EGNN
    use_conv     =False,     # 1‑D conv after prot EGNN
    conv_kernel  =7,

    # training
    loss_type='mae', study_metrics=True,
    lr=5e-3, epochs=10, batch_size=1, #batchsize not safw to inc

    # misc
    device='cuda' if torch.cuda.is_available() else 'cpu',
    seed=0, analysis_mode=False,
    num_paths=20, split_ratio=0.5, split_seed=0,
    runid=datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
)
print("Run‑ID:", cfg.runid)
random.seed(cfg.seed); np.random.seed(cfg.seed); torch.manual_seed(cfg.seed)

model=Model(cfg); print("params:",sum(p.numel() for p in model.parameters()))

# ================================================================
# 4) loaders
# ================================================================
allp=glob.glob("../../../data/pkegnn_INS/inputs/*.npz")
tr,val=split(allp)
train_ds=HoodDS(tr,cfg.hood_k); val_ds=HoodDS(val,cfg.hood_k)
coll=lambda b: pad(b,cfg.hood_k,cfg.device,cfg.analysis_mode)
tr_loader=DataLoader(train_ds,batch_size=cfg.batch_size,shuffle=True ,collate_fn=coll)
va_loader=DataLoader(val_ds,batch_size=cfg.batch_size,shuffle=False,collate_fn=coll)

# ================================================================
# 5) training utils
# ================================================================
p_fn = nn.L1Loss() if cfg.loss_type=='mae' else nn.MSELoss()
opt  = torch.optim.AdamW(model.parameters(),lr=cfg.lr)
sch  = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,'min',0.5,3)
from torch.cuda.amp import GradScaler, autocast
scaler=GradScaler(enabled=(cfg.device=='cuda'))

def run(loader,train):
    model.train() if train else model.eval(); loss_sum=0;n=0
    for z,x,y,m,*_ in loader:
        v=m.view(-1); z=z.view(-1,z.size(2))[v].to(cfg.device)
        x=x.view(-1,x.size(2),3)[v].to(cfg.device); y=y.view(-1)[v].to(cfg.device)
        with autocast(enabled=(cfg.device=='cuda')):
            pred=model(z,x).flatten(); loss=p_fn(pred,y)
        if train:
            opt.zero_grad(); scaler.scale(loss).backward()
            scaler.step(opt); scaler.update()
        loss_sum+=loss.item(); n+=1
    return loss_sum/n

# ================================================================
# 6) train
# ================================================================
for e in range(cfg.epochs):
    tr=run(tr_loader,True)
    va=run(va_loader,False); sch.step(va)
    print(f"[{e+1}/{cfg.epochs}]  train {tr:.4f} | val {va:.4f}")
print(time.time() - t0,"sec")

Run‑ID: 20250726_225632
params: 18149




[1/10]  train 2.0834 | val 1.2063
[2/10]  train 1.3065 | val 1.0911
[3/10]  train 1.1154 | val 1.0493
[4/10]  train 1.0105 | val 0.9790
[5/10]  train 0.9371 | val 0.9596
[6/10]  train 0.9248 | val 0.9660
[7/10]  train 0.8935 | val 0.9430
[8/10]  train 0.8496 | val 0.9694
[9/10]  train 0.8412 | val 0.9084
[10/10]  train 0.8207 | val 0.8903
97.00188207626343 sec


proceeding with nconv

In [6]:
t0=time.time()
cfg = Cfg(
    # backbone
    dim=12, basis=6, depth=2, hidden_dim=4, dropout=0.02,
    hood_k=100, num_neighbors=8, norm_coors=True,

    # aggregation: 'linear' | 'nconv' | 'pool'
    aggregator   ='nconv',

    # block switches
    use_rbf      =True,
    use_attn     =False,
    use_boost    =False,     # Linear(1→1) after aggregator
    use_prot     =False,      # protein‑level EGNN
    use_conv     =False,     # 1‑D conv after prot EGNN
    conv_kernel  =7,

    # training
    loss_type='mae', study_metrics=True,
    lr=5e-3, epochs=10, batch_size=1, #batchsize not safw to inc

    # misc
    device='cuda' if torch.cuda.is_available() else 'cpu',
    seed=0, analysis_mode=False,
    num_paths=20, split_ratio=0.5, split_seed=0,
    runid=datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
)
print("Run‑ID:", cfg.runid)
random.seed(cfg.seed); np.random.seed(cfg.seed); torch.manual_seed(cfg.seed)

model=Model(cfg); 
print("params:",sum(p.numel() for p in model.parameters()))

# ================================================================
# 4) loaders
# ================================================================
allp=glob.glob("../../../data/pkegnn_INS/inputs/*.npz")
tr,val=split(allp)
train_ds=HoodDS(tr,cfg.hood_k); val_ds=HoodDS(val,cfg.hood_k)
coll=lambda b: pad(b,cfg.hood_k,cfg.device,cfg.analysis_mode)
tr_loader=DataLoader(train_ds,batch_size=cfg.batch_size,shuffle=True ,collate_fn=coll)
va_loader=DataLoader(val_ds,batch_size=cfg.batch_size,shuffle=False,collate_fn=coll)

# ================================================================
# 5) training utils
# ================================================================
p_fn = nn.L1Loss() if cfg.loss_type=='mae' else nn.MSELoss()
opt  = torch.optim.AdamW(model.parameters(),lr=cfg.lr)
sch  = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,'min',0.5,3)
from torch.cuda.amp import GradScaler, autocast
scaler=GradScaler(enabled=(cfg.device=='cuda'))

def run(loader,train):
    model.train() if train else model.eval(); loss_sum=0;n=0
    for z,x,y,m,*_ in loader:
        v=m.view(-1); z=z.view(-1,z.size(2))[v].to(cfg.device)
        x=x.view(-1,x.size(2),3)[v].to(cfg.device); y=y.view(-1)[v].to(cfg.device)
        with autocast(enabled=(cfg.device=='cuda')):
            pred=model(z,x).flatten(); loss=p_fn(pred,y)
        if train:
            opt.zero_grad(); scaler.scale(loss).backward()
            scaler.step(opt); scaler.update()
        loss_sum+=loss.item(); n+=1
    return loss_sum/n

# ================================================================
# 6) train
# ================================================================
for e in range(cfg.epochs):
    tr=run(tr_loader,True)
    va=run(va_loader,False); sch.step(va)
    print(f"[{e+1}/{cfg.epochs}]  train {tr:.4f} | val {va:.4f}")
print(time.time() - t0,"sec")

Run‑ID: 20250726_230147
params: 18149




[1/10]  train 2.0965 | val 1.1284
[2/10]  train 1.3527 | val 1.1105
[3/10]  train 1.1306 | val 1.0777
[4/10]  train 1.0158 | val 0.9621
[5/10]  train 0.9308 | val 0.9360
[6/10]  train 0.9074 | val 0.9313
[7/10]  train 0.8699 | val 0.9447
[8/10]  train 0.8347 | val 0.9875
[9/10]  train 0.8336 | val 1.0118
[10/10]  train 0.8179 | val 0.9079
84.53549122810364 sec


In [5]:
import torch, torch.nn as nn
import numpy as np
from egnn_pytorch import EGNN
from architecture import (StackedEGNN,
                          LearnableRBF,
                          AttentionBlock,
                          TunableBlock)
import time, datetime
import glob
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.neighbors import NearestNeighbors
from torch.utils.data import Dataset, DataLoader
import torch, torch.nn as nn
import numpy as np
from egnn_pytorch import EGNN
from architecture import (StackedEGNN,
                          LearnableRBF,
                          AttentionBlock,
                          TunableBlock)
import time, datetime
import glob
from torch.cuda.amp import autocast, GradScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.neighbors import NearestNeighbors
from torch.utils.data import Dataset, DataLoader
t0=time.time()
cfg = Cfg(
    # backbone
    dim=12, basis=6, depth=2, hidden_dim=4, dropout=0.02,
    hood_k=100, num_neighbors=8, norm_coors=True,

    # aggregation: 'linear' | 'nconv' | 'pool'
    aggregator   ='nconv',

    # block switches
    use_rbf      =True,
    use_attn     =True,
    use_boost    =False,     # Linear(1→1) after aggregator
    use_prot     =False,      # protein‑level EGNN
    use_conv     =False,     # 1‑D conv after prot EGNN
    conv_kernel  =7,

    # training
    loss_type='mae', study_metrics=True,
    lr=5e-3, epochs=10, batch_size=1, #batchsize not safw to inc

    # misc
    device='cuda' if torch.cuda.is_available() else 'cpu',
    seed=0, analysis_mode=False,
    num_paths=20, split_ratio=0.5, split_seed=0,
    runid=datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
)
print("Run‑ID:", cfg.runid)
random.seed(cfg.seed); np.random.seed(cfg.seed); torch.manual_seed(cfg.seed)

model=Model(cfg); 
print("params:",sum(p.numel() for p in model.parameters()))

# ================================================================
# 4) loaders
# ================================================================
allp=glob.glob("../../../data/pkegnn_INS/inputs/*.npz")
tr,val=split(allp)
train_ds=HoodDS(tr,cfg.hood_k); val_ds=HoodDS(val,cfg.hood_k)
coll=lambda b: pad(b,cfg.hood_k,cfg.device,cfg.analysis_mode)
tr_loader=DataLoader(train_ds,batch_size=cfg.batch_size,shuffle=True ,collate_fn=coll)
va_loader=DataLoader(val_ds,batch_size=cfg.batch_size,shuffle=False,collate_fn=coll)

# ================================================================
# 5) training utils
# ================================================================
p_fn = nn.L1Loss() if cfg.loss_type=='mae' else nn.MSELoss()
opt  = torch.optim.AdamW(model.parameters(),lr=cfg.lr)
sch  = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,'min',0.5,3)
from torch.cuda.amp import GradScaler, autocast
scaler=GradScaler(enabled=(cfg.device=='cuda'))

def run(loader,train):
    model.train() if train else model.eval(); loss_sum=0;n=0
    for z,x,y,m,*_ in loader:
        v=m.view(-1); z=z.view(-1,z.size(2))[v].to(cfg.device)
        x=x.view(-1,x.size(2),3)[v].to(cfg.device); y=y.view(-1)[v].to(cfg.device)
        with autocast(enabled=(cfg.device=='cuda')):
            pred=model(z,x).flatten(); loss=p_fn(pred,y)
        if train:
            opt.zero_grad(); scaler.scale(loss).backward()
            scaler.step(opt); scaler.update()
        loss_sum+=loss.item(); n+=1
    return loss_sum/n

# ================================================================
# 6) train
# ================================================================
for e in range(cfg.epochs):
    tr=run(tr_loader,True)
    va=run(va_loader,False); sch.step(va)
    print(f"[{e+1}/{cfg.epochs}]  train {tr:.4f} | val {va:.4f}")
print(time.time() - t0,"sec")

Run‑ID: 20250726_231218
params: 18149




[1/10]  train 2.6957 | val 2.0016
[2/10]  train 1.5322 | val 1.2235
[3/10]  train 1.1391 | val 1.0274
[4/10]  train 1.0450 | val 0.9733
[5/10]  train 0.9588 | val 0.9335
[6/10]  train 0.9158 | val 0.9207
[7/10]  train 0.9300 | val 0.9829
[8/10]  train 0.8637 | val 0.9226
[9/10]  train 0.8437 | val 0.9758
[10/10]  train 0.9055 | val 0.8894
115.00042057037354 sec


winner#

In [6]:
#winner
t0=time.time()
cfg = Cfg(
    # backbone
    dim=12, basis=6, depth=2, hidden_dim=4, dropout=0.02,
    hood_k=100, num_neighbors=8, norm_coors=True,

    # aggregation: 'linear' | 'nconv' | 'pool'
    aggregator   ='nconv',

    # block switches
    use_rbf      =True,
    use_attn     =True,
    use_boost    =True,     # Linear(1→1) after aggregator
    use_prot     =False,      # protein‑level EGNN
    use_conv     =False,     # 1‑D conv after prot EGNN
    conv_kernel  =7,

    # training
    loss_type='mae', study_metrics=True,
    lr=5e-3, epochs=10, batch_size=1, #batchsize not safw to inc

    # misc
    device='cuda' if torch.cuda.is_available() else 'cpu',
    seed=0, analysis_mode=False,
    num_paths=20, split_ratio=0.5, split_seed=0,
    runid=datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
)
print("Run‑ID:", cfg.runid)
random.seed(cfg.seed); np.random.seed(cfg.seed); torch.manual_seed(cfg.seed)

model=Model(cfg); 
print("params:",sum(p.numel() for p in model.parameters()))

# ================================================================
# 4) loaders
# ================================================================
allp=glob.glob("../../../data/pkegnn_INS/inputs/*.npz")
tr,val=split(allp)
train_ds=HoodDS(tr,cfg.hood_k); val_ds=HoodDS(val,cfg.hood_k)
coll=lambda b: pad(b,cfg.hood_k,cfg.device,cfg.analysis_mode)
tr_loader=DataLoader(train_ds,batch_size=cfg.batch_size,shuffle=True ,collate_fn=coll)
va_loader=DataLoader(val_ds,batch_size=cfg.batch_size,shuffle=False,collate_fn=coll)

# ================================================================
# 5) training utils
# ================================================================
p_fn = nn.L1Loss() if cfg.loss_type=='mae' else nn.MSELoss()
opt  = torch.optim.AdamW(model.parameters(),lr=cfg.lr)
sch  = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,'min',0.5,3)
from torch.cuda.amp import GradScaler, autocast
scaler=GradScaler(enabled=(cfg.device=='cuda'))

def run(loader,train):
    model.train() if train else model.eval(); loss_sum=0;n=0
    for z,x,y,m,*_ in loader:
        v=m.view(-1); z=z.view(-1,z.size(2))[v].to(cfg.device)
        x=x.view(-1,x.size(2),3)[v].to(cfg.device); y=y.view(-1)[v].to(cfg.device)
        with autocast(enabled=(cfg.device=='cuda')):
            pred=model(z,x).flatten(); loss=p_fn(pred,y)
        if train:
            opt.zero_grad(); scaler.scale(loss).backward()
            scaler.step(opt); scaler.update()
        loss_sum+=loss.item(); n+=1
    return loss_sum/n

# ================================================================
# 6) train
# ================================================================
for e in range(cfg.epochs):
    tr=run(tr_loader,True)
    va=run(va_loader,False); sch.step(va)
    print(f"[{e+1}/{cfg.epochs}]  train {tr:.4f} | val {va:.4f}")
print(time.time() - t0,"sec")

Run‑ID: 20250726_231416
params: 18151




[1/10]  train 2.0807 | val 1.4433
[2/10]  train 1.4117 | val 1.2751
[3/10]  train 1.1584 | val 1.0363
[4/10]  train 1.0491 | val 0.9936
[5/10]  train 0.9829 | val 0.9851
[6/10]  train 0.9159 | val 0.9126
[7/10]  train 0.8692 | val 0.8687
[8/10]  train 0.8351 | val 0.8687
[9/10]  train 0.8161 | val 0.8547
[10/10]  train 0.7876 | val 0.8465
111.8393828868866 sec


In [7]:

t0=time.time()
cfg = Cfg(
    # backbone
    dim=12, basis=6, depth=2, hidden_dim=4, dropout=0.02,
    hood_k=100, num_neighbors=8, norm_coors=True,

    # aggregation: 'linear' | 'nconv' | 'pool'
    aggregator   ='nconv',

    # block switches
    use_rbf      =True,
    use_attn     =True,
    use_boost    =False,     # Linear(1→1) after aggregator
    use_prot     =True,      # protein‑level EGNN
    use_conv     =False,     # 1‑D conv after prot EGNN
    conv_kernel  =7,

    # training
    loss_type='mae', study_metrics=True, #study metrics not working
    lr=5e-3, epochs=10, batch_size=1, #batchsize not safw to inc

    # misc
    device='cuda' if torch.cuda.is_available() else 'cpu',
    seed=0, analysis_mode=False,
    num_paths=20, split_ratio=0.5, split_seed=0,
    runid=datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
)
print("Run‑ID:", cfg.runid)
random.seed(cfg.seed); np.random.seed(cfg.seed); torch.manual_seed(cfg.seed)

model=Model(cfg); 
print("params:",sum(p.numel() for p in model.parameters()))

# ================================================================
# 4) loaders
# ================================================================
allp=glob.glob("../../../data/pkegnn_INS/inputs/*.npz")
tr,val=split(allp)
train_ds=HoodDS(tr,cfg.hood_k); val_ds=HoodDS(val,cfg.hood_k)
coll=lambda b: pad(b,cfg.hood_k,cfg.device,cfg.analysis_mode)
tr_loader=DataLoader(train_ds,batch_size=cfg.batch_size,shuffle=True ,collate_fn=coll)
va_loader=DataLoader(val_ds,batch_size=cfg.batch_size,shuffle=False,collate_fn=coll)

# ================================================================
# 5) training utils
# ================================================================
p_fn = nn.L1Loss() if cfg.loss_type=='mae' else nn.MSELoss()
opt  = torch.optim.AdamW(model.parameters(),lr=cfg.lr)
sch  = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,'min',0.5,3)
from torch.cuda.amp import GradScaler, autocast
scaler=GradScaler(enabled=(cfg.device=='cuda'))

def run(loader,train):
    model.train() if train else model.eval(); loss_sum=0;n=0
    for z,x,y,m,*_ in loader:
        v=m.view(-1); z=z.view(-1,z.size(2))[v].to(cfg.device)
        x=x.view(-1,x.size(2),3)[v].to(cfg.device); y=y.view(-1)[v].to(cfg.device)
        with autocast(enabled=(cfg.device=='cuda')):
            pred=model(z,x).flatten(); loss=p_fn(pred,y)
        if train:
            opt.zero_grad(); scaler.scale(loss).backward()
            scaler.step(opt); scaler.update()
        loss_sum+=loss.item(); n+=1
    return loss_sum/n

# ================================================================
# 6) train
# ================================================================
for e in range(cfg.epochs):
    tr=run(tr_loader,True)
    va=run(va_loader,False); sch.step(va)
    print(f"[{e+1}/{cfg.epochs}]  train {tr:.4f} | val {va:.4f}")
print(time.time() - t0,"sec")

Run‑ID: 20250726_231714
params: 19477




[1/10]  train 2.0621 | val 1.2336
[2/10]  train 1.3531 | val 1.4853
[3/10]  train 1.1949 | val 1.0629
[4/10]  train 1.0942 | val 1.0373
[5/10]  train 0.9542 | val 1.0138
[6/10]  train 0.9741 | val 0.9294
[7/10]  train 0.8765 | val 0.9586
[8/10]  train 0.8452 | val 0.8553
[9/10]  train 0.8117 | val 0.8640
[10/10]  train 0.7772 | val 0.8403
104.7829270362854 sec


In [8]:

t0=time.time()
cfg = Cfg(
    # backbone
    dim=12, basis=6, depth=2, hidden_dim=4, dropout=0.02,
    hood_k=100, num_neighbors=8, norm_coors=True,

    # aggregation: 'linear' | 'nconv' | 'pool'
    aggregator   ='nconv',

    # block switches
    use_rbf      =True,
    use_attn     =True,
    use_boost    =True,     # Linear(1→1) after aggregator
    use_prot     =True,      # protein‑level EGNN
    use_conv     =False,     # 1‑D conv after prot EGNN
    conv_kernel  =7,

    # training
    loss_type='mae', study_metrics=True, #study metrics not working
    lr=5e-3, epochs=10, batch_size=1, #batchsize not safw to inc

    # misc
    device='cuda' if torch.cuda.is_available() else 'cpu',
    seed=0, analysis_mode=False,
    num_paths=20, split_ratio=0.5, split_seed=0,
    runid=datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
)
print("Run‑ID:", cfg.runid)
random.seed(cfg.seed); np.random.seed(cfg.seed); torch.manual_seed(cfg.seed)

model=Model(cfg); 
print("params:",sum(p.numel() for p in model.parameters()))

# ================================================================
# 4) loaders
# ================================================================
allp=glob.glob("../../../data/pkegnn_INS/inputs/*.npz")
tr,val=split(allp)
train_ds=HoodDS(tr,cfg.hood_k); val_ds=HoodDS(val,cfg.hood_k)
coll=lambda b: pad(b,cfg.hood_k,cfg.device,cfg.analysis_mode)
tr_loader=DataLoader(train_ds,batch_size=cfg.batch_size,shuffle=True ,collate_fn=coll)
va_loader=DataLoader(val_ds,batch_size=cfg.batch_size,shuffle=False,collate_fn=coll)

# ================================================================
# 5) training utils
# ================================================================
p_fn = nn.L1Loss() if cfg.loss_type=='mae' else nn.MSELoss()
opt  = torch.optim.AdamW(model.parameters(),lr=cfg.lr)
sch  = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,'min',0.5,3)
from torch.cuda.amp import GradScaler, autocast
scaler=GradScaler(enabled=(cfg.device=='cuda'))

def run(loader,train):
    model.train() if train else model.eval(); loss_sum=0;n=0
    for z,x,y,m,*_ in loader:
        v=m.view(-1); z=z.view(-1,z.size(2))[v].to(cfg.device)
        x=x.view(-1,x.size(2),3)[v].to(cfg.device); y=y.view(-1)[v].to(cfg.device)
        with autocast(enabled=(cfg.device=='cuda')):
            pred=model(z,x).flatten(); loss=p_fn(pred,y)
        if train:
            opt.zero_grad(); scaler.scale(loss).backward()
            scaler.step(opt); scaler.update()
        loss_sum+=loss.item(); n+=1
    return loss_sum/n

# ================================================================
# 6) train
# ================================================================
for e in range(cfg.epochs):
    tr=run(tr_loader,True)
    va=run(va_loader,False); sch.step(va)
    print(f"[{e+1}/{cfg.epochs}]  train {tr:.4f} | val {va:.4f}")
print(time.time() - t0,"sec")

Run‑ID: 20250726_231859
params: 19479




[1/10]  train 2.1856 | val 1.1215
[2/10]  train 1.3608 | val 1.0839
[3/10]  train 1.1348 | val 1.0261
[4/10]  train 1.0440 | val 1.0091
[5/10]  train 0.9778 | val 0.9580
[6/10]  train 0.9328 | val 0.9290
[7/10]  train 0.9102 | val 0.8918
[8/10]  train 0.8508 | val 0.9068
[9/10]  train 0.8458 | val 0.8788
[10/10]  train 0.8131 | val 0.8615
103.34276056289673 sec


In [None]:
proceed w prot no boost

In [9]:

t0=time.time()
cfg = Cfg(
    # backbone
    dim=12, basis=6, depth=2, hidden_dim=4, dropout=0.02,
    hood_k=100, num_neighbors=8, norm_coors=True,

    # aggregation: 'linear' | 'nconv' | 'pool'
    aggregator   ='nconv',

    # block switches
    use_rbf      =True,
    use_attn     =True,
    use_boost    =False,     # Linear(1→1) after aggregator
    use_prot     =True,      # protein‑level EGNN
    use_conv     =True,     # 1‑D conv after prot EGNN
    conv_kernel  =7,

    # training
    loss_type='mae', study_metrics=True, #study metrics not working
    lr=5e-3, epochs=10, batch_size=1, #batchsize not safw to inc

    # misc
    device='cuda' if torch.cuda.is_available() else 'cpu',
    seed=0, analysis_mode=False,
    num_paths=20, split_ratio=0.5, split_seed=0,
    runid=datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
)
print("Run‑ID:", cfg.runid)
random.seed(cfg.seed); np.random.seed(cfg.seed); torch.manual_seed(cfg.seed)

model=Model(cfg); 
print("params:",sum(p.numel() for p in model.parameters()))

# ================================================================
# 4) loaders
# ================================================================
allp=glob.glob("../../../data/pkegnn_INS/inputs/*.npz")
tr,val=split(allp)
train_ds=HoodDS(tr,cfg.hood_k); val_ds=HoodDS(val,cfg.hood_k)
coll=lambda b: pad(b,cfg.hood_k,cfg.device,cfg.analysis_mode)
tr_loader=DataLoader(train_ds,batch_size=cfg.batch_size,shuffle=True ,collate_fn=coll)
va_loader=DataLoader(val_ds,batch_size=cfg.batch_size,shuffle=False,collate_fn=coll)

# ================================================================
# 5) training utils
# ================================================================
p_fn = nn.L1Loss() if cfg.loss_type=='mae' else nn.MSELoss()
opt  = torch.optim.AdamW(model.parameters(),lr=cfg.lr)
sch  = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,'min',0.5,3)
from torch.cuda.amp import GradScaler, autocast
scaler=GradScaler(enabled=(cfg.device=='cuda'))

def run(loader,train):
    model.train() if train else model.eval(); loss_sum=0;n=0
    for z,x,y,m,*_ in loader:
        v=m.view(-1); z=z.view(-1,z.size(2))[v].to(cfg.device)
        x=x.view(-1,x.size(2),3)[v].to(cfg.device); y=y.view(-1)[v].to(cfg.device)
        with autocast(enabled=(cfg.device=='cuda')):
            pred=model(z,x).flatten(); loss=p_fn(pred,y)
        if train:
            opt.zero_grad(); scaler.scale(loss).backward()
            scaler.step(opt); scaler.update()
        loss_sum+=loss.item(); n+=1
    return loss_sum/n

# ================================================================
# 6) train
# ================================================================
for e in range(cfg.epochs):
    tr=run(tr_loader,True)
    va=run(va_loader,False); sch.step(va)
    print(f"[{e+1}/{cfg.epochs}]  train {tr:.4f} | val {va:.4f}")
print(time.time() - t0,"sec")

Run‑ID: 20250726_232329
params: 19485




[1/10]  train 1.7781 | val 1.2738
[2/10]  train 1.2617 | val 1.1750
[3/10]  train 1.1787 | val 1.1414
[4/10]  train 1.1329 | val 1.1605
[5/10]  train 1.1012 | val 1.1611
[6/10]  train 1.0869 | val 1.1816
[7/10]  train 1.0755 | val 1.1597
[8/10]  train 1.0481 | val 1.1501
[9/10]  train 1.0359 | val 1.1420
[10/10]  train 1.0279 | val 1.1490
106.9748044013977 sec


no conv

In [12]:
[1/10]  train 2.0621 | val 1.2336
[2/10]  train 1.3531 | val 1.4853
[3/10]  train 1.1949 | val 1.0629
[4/10]  train 1.0942 | val 1.0373
[5/10]  train 0.9542 | val 1.0138
[6/10]  train 0.9741 | val 0.9294
[7/10]  train 0.8765 | val 0.9586
[8/10]  train 0.8452 | val 0.8553
[9/10]  train 0.8117 | val 0.8640
[10/10]  train 0.7772 | val 0.8403
104.7829270362854 sec


SyntaxError: invalid syntax (<ipython-input-12-96963c89afcd>, line 1)

norbf 

In [11]:

t0=time.time()
cfg = Cfg(
    # backbone
    dim=12, basis=6, depth=2, hidden_dim=4, dropout=0.02,
    hood_k=100, num_neighbors=8, norm_coors=True,

    # aggregation: 'linear' | 'nconv' | 'pool'
    aggregator   ='nconv',

    # block switches
    use_rbf      =False,
    use_attn     =True,
    use_boost    =False,     # Linear(1→1) after aggregator
    use_prot     =True,      # protein‑level EGNN
    use_conv     =False,     # 1‑D conv after prot EGNN
    conv_kernel  =7,

    # training
    loss_type='mae', study_metrics=True, #study metrics not working
    lr=5e-3, epochs=10, batch_size=1, #batchsize not safw to inc

    # misc
    device='cuda' if torch.cuda.is_available() else 'cpu',
    seed=0, analysis_mode=False,
    num_paths=20, split_ratio=0.5, split_seed=0,
    runid=datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
)
print("Run‑ID:", cfg.runid)
random.seed(cfg.seed); np.random.seed(cfg.seed); torch.manual_seed(cfg.seed)

model=Model(cfg); 
print("params:",sum(p.numel() for p in model.parameters()))

# ================================================================
# 4) loaders
# ================================================================
allp=glob.glob("../../../data/pkegnn_INS/inputs/*.npz")
tr,val=split(allp)
train_ds=HoodDS(tr,cfg.hood_k); val_ds=HoodDS(val,cfg.hood_k)
coll=lambda b: pad(b,cfg.hood_k,cfg.device,cfg.analysis_mode)
tr_loader=DataLoader(train_ds,batch_size=cfg.batch_size,shuffle=True ,collate_fn=coll)
va_loader=DataLoader(val_ds,batch_size=cfg.batch_size,shuffle=False,collate_fn=coll)

# ================================================================
# 5) training utils
# ================================================================
p_fn = nn.L1Loss() if cfg.loss_type=='mae' else nn.MSELoss()
opt  = torch.optim.AdamW(model.parameters(),lr=cfg.lr)
sch  = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,'min',0.5,3)
from torch.cuda.amp import GradScaler, autocast
scaler=GradScaler(enabled=(cfg.device=='cuda'))

def run(loader,train):
    model.train() if train else model.eval(); loss_sum=0;n=0
    for z,x,y,m,*_ in loader:
        v=m.view(-1); z=z.view(-1,z.size(2))[v].to(cfg.device)
        x=x.view(-1,x.size(2),3)[v].to(cfg.device); y=y.view(-1)[v].to(cfg.device)
        with autocast(enabled=(cfg.device=='cuda')):
            pred=model(z,x).flatten(); loss=p_fn(pred,y)
        if train:
            opt.zero_grad(); scaler.scale(loss).backward()
            scaler.step(opt); scaler.update()
        loss_sum+=loss.item(); n+=1
    return loss_sum/n

# ================================================================
# 6) train
# ================================================================
for e in range(cfg.epochs):
    tr=run(tr_loader,True)
    va=run(va_loader,False); sch.step(va)
    print(f"[{e+1}/{cfg.epochs}]  train {tr:.4f} | val {va:.4f}")
print(time.time() - t0,"sec")

Run‑ID: 20250726_232609
params: 19477




[1/10]  train 2.8559 | val 1.4831
[2/10]  train 1.5866 | val 1.1893
[3/10]  train 1.1755 | val 1.1914
[4/10]  train 1.1214 | val 1.1487
[5/10]  train 0.9889 | val 0.9604
[6/10]  train 0.9210 | val 0.9354
[7/10]  train 0.9036 | val 0.9515
[8/10]  train 0.9083 | val 0.8885
[9/10]  train 0.8451 | val 0.9038
[10/10]  train 0.8726 | val 0.9128
107.1022801399231 sec


prot rbf no attn 

In [13]:

t0=time.time()
cfg = Cfg(
    # backbone
    dim=12, basis=6, depth=2, hidden_dim=4, dropout=0.02,
    hood_k=100, num_neighbors=8, norm_coors=True,

    # aggregation: 'linear' | 'nconv' | 'pool'
    aggregator   ='nconv',

    # block switches
    use_rbf      =True,
    use_attn     =False,
    use_boost    =False,     # Linear(1→1) after aggregator
    use_prot     =True,      # protein‑level EGNN
    use_conv     =False,     # 1‑D conv after prot EGNN
    conv_kernel  =7,

    # training
    loss_type='mae', study_metrics=True, #study metrics not working
    lr=5e-3, epochs=10, batch_size=1, #batchsize not safw to inc

    # misc
    device='cuda' if torch.cuda.is_available() else 'cpu',
    seed=0, analysis_mode=False,
    num_paths=20, split_ratio=0.5, split_seed=0,
    runid=datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
)
print("Run‑ID:", cfg.runid)
random.seed(cfg.seed); np.random.seed(cfg.seed); torch.manual_seed(cfg.seed)

model=Model(cfg); 
print("params:",sum(p.numel() for p in model.parameters()))

# ================================================================
# 4) loaders
# ================================================================
allp=glob.glob("../../../data/pkegnn_INS/inputs/*.npz")
tr,val=split(allp)
train_ds=HoodDS(tr,cfg.hood_k); val_ds=HoodDS(val,cfg.hood_k)
coll=lambda b: pad(b,cfg.hood_k,cfg.device,cfg.analysis_mode)
tr_loader=DataLoader(train_ds,batch_size=cfg.batch_size,shuffle=True ,collate_fn=coll)
va_loader=DataLoader(val_ds,batch_size=cfg.batch_size,shuffle=False,collate_fn=coll)

# ================================================================
# 5) training utils
# ================================================================
p_fn = nn.L1Loss() if cfg.loss_type=='mae' else nn.MSELoss()
opt  = torch.optim.AdamW(model.parameters(),lr=cfg.lr)
sch  = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,'min',0.5,3)
from torch.cuda.amp import GradScaler, autocast
scaler=GradScaler(enabled=(cfg.device=='cuda'))

def run(loader,train):
    model.train() if train else model.eval(); loss_sum=0;n=0
    for z,x,y,m,*_ in loader:
        v=m.view(-1); z=z.view(-1,z.size(2))[v].to(cfg.device)
        x=x.view(-1,x.size(2),3)[v].to(cfg.device); y=y.view(-1)[v].to(cfg.device)
        with autocast(enabled=(cfg.device=='cuda')):
            pred=model(z,x).flatten(); loss=p_fn(pred,y)
        if train:
            opt.zero_grad(); scaler.scale(loss).backward()
            scaler.step(opt); scaler.update()
        loss_sum+=loss.item(); n+=1
    return loss_sum/n

# ================================================================
# 6) train
# ================================================================
for e in range(cfg.epochs):
    tr=run(tr_loader,True)
    va=run(va_loader,False); sch.step(va)
    print(f"[{e+1}/{cfg.epochs}]  train {tr:.4f} | val {va:.4f}")
print(time.time() - t0,"sec")

Run‑ID: 20250726_232925
params: 19477




[1/10]  train 2.2655 | val 1.7489
[2/10]  train 1.4005 | val 1.0979
[3/10]  train 1.1693 | val 1.0348
[4/10]  train 1.0632 | val 0.9992
[5/10]  train 0.9745 | val 1.0157
[6/10]  train 0.9501 | val 0.9340
[7/10]  train 0.8894 | val 1.0206
[8/10]  train 0.8956 | val 1.0333
[9/10]  train 0.8327 | val 0.8982
[10/10]  train 0.7930 | val 0.8875
84.47911500930786 sec


In [14]:

t0=time.time()
cfg = Cfg(
    # backbone
    dim=12, basis=6, depth=2, hidden_dim=4, dropout=0.02,
    hood_k=100, num_neighbors=8, norm_coors=True,

    # aggregation: 'linear' | 'nconv' | 'pool'
    aggregator   ='nconv',

    # block switches
    use_rbf      =True,
    use_attn     =False,
    use_boost    =True,     # Linear(1→1) after aggregator
    use_prot     =True,      # protein‑level EGNN
    use_conv     =False,     # 1‑D conv after prot EGNN
    conv_kernel  =7,

    # training
    loss_type='mae', study_metrics=True, #study metrics not working
    lr=5e-3, epochs=10, batch_size=1, #batchsize not safw to inc

    # misc
    device='cuda' if torch.cuda.is_available() else 'cpu',
    seed=0, analysis_mode=False,
    num_paths=20, split_ratio=0.5, split_seed=0,
    runid=datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
)
print("Run‑ID:", cfg.runid)
random.seed(cfg.seed); np.random.seed(cfg.seed); torch.manual_seed(cfg.seed)

model=Model(cfg); 
print("params:",sum(p.numel() for p in model.parameters()))

# ================================================================
# 4) loaders
# ================================================================
allp=glob.glob("../../../data/pkegnn_INS/inputs/*.npz")
tr,val=split(allp)
train_ds=HoodDS(tr,cfg.hood_k); val_ds=HoodDS(val,cfg.hood_k)
coll=lambda b: pad(b,cfg.hood_k,cfg.device,cfg.analysis_mode)
tr_loader=DataLoader(train_ds,batch_size=cfg.batch_size,shuffle=True ,collate_fn=coll)
va_loader=DataLoader(val_ds,batch_size=cfg.batch_size,shuffle=False,collate_fn=coll)

# ================================================================
# 5) training utils
# ================================================================
p_fn = nn.L1Loss() if cfg.loss_type=='mae' else nn.MSELoss()
opt  = torch.optim.AdamW(model.parameters(),lr=cfg.lr)
sch  = torch.optim.lr_scheduler.ReduceLROnPlateau(opt,'min',0.5,3)
from torch.cuda.amp import GradScaler, autocast
scaler=GradScaler(enabled=(cfg.device=='cuda'))

def run(loader,train):
    model.train() if train else model.eval(); loss_sum=0;n=0
    for z,x,y,m,*_ in loader:
        v=m.view(-1); z=z.view(-1,z.size(2))[v].to(cfg.device)
        x=x.view(-1,x.size(2),3)[v].to(cfg.device); y=y.view(-1)[v].to(cfg.device)
        with autocast(enabled=(cfg.device=='cuda')):
            pred=model(z,x).flatten(); loss=p_fn(pred,y)
        if train:
            opt.zero_grad(); scaler.scale(loss).backward()
            scaler.step(opt); scaler.update()
        loss_sum+=loss.item(); n+=1
    return loss_sum/n

# ================================================================
# 6) train
# ================================================================
for e in range(cfg.epochs):
    tr=run(tr_loader,True)
    va=run(va_loader,False); sch.step(va)
    print(f"[{e+1}/{cfg.epochs}]  train {tr:.4f} | val {va:.4f}")
print(time.time() - t0,"sec")

Run‑ID: 20250726_233219
params: 19479




[1/10]  train 1.8082 | val 1.6452
[2/10]  train 1.3010 | val 1.1606
[3/10]  train 1.0970 | val 1.0790
[4/10]  train 1.0353 | val 1.0194
[5/10]  train 0.9837 | val 0.9806
[6/10]  train 0.9416 | val 0.9487
[7/10]  train 0.9137 | val 0.9431
[8/10]  train 0.8695 | val 0.9199
[9/10]  train 0.8320 | val 0.9162
[10/10]  train 0.8559 | val 0.9077
79.04292941093445 sec
