In [13]:
import torch, pickle, time, os, random
import numpy as np
import os.path as osp
import matplotlib.pyplot as plt
import torch_geometric as tg
from torch_geometric.loader import DataLoader
# accelerate huggingface to GPU
if torch.cuda.is_available():
    from accelerate import Accelerator
    accelerator = Accelerator()
    device = accelerator.device
from pysr import pysr, best
from tqdm import tqdm
torch.manual_seed(42)
random.seed(42)


print('Loading data')

case='vlarge_all_4t_z0.0_standard_raw'

datat=pickle.load(open(osp.expanduser(f'~/../../../scratch/gpfs/cj1223/GraphStorage/{case}/data.pkl'), 'rb'))

from torch_geometric.data import Data
data=[]
for d in datat:
    data.append(Data(x=d.x[:,[0,3,4,19,20]], edge_index=d.edge_index, edge_attr=d.edge_attr, y=d.y[0]))

try:
    n_targ=len(data[0].y)
except:
    n_targ=1
n_feat=len(data[0].x[0])
n_feat, n_targ

print('Loaded data')

from torch.nn import ReLU, Linear, Module, LayerNorm, Sequential
class MLP(Module):
    def __init__(self, n_in, n_out, hidden=128, nlayers=2, layer_norm=True):
        super().__init__()
        layers = [Linear(n_in, hidden), ReLU()]
        for i in range(nlayers):
            layers.append(Linear(hidden, hidden))
            layers.append(ReLU()) 
        if layer_norm:
            layers.append(LayerNorm(hidden)) #yay
        layers.append(Linear(hidden, n_out))
        self.mlp = Sequential(*layers)

    def forward(self, x):
        return self.mlp(x)

Loading data
Loaded data


In [17]:
from torch_geometric.nn import global_add_pool
from torch_scatter import scatter_add

class GCN(torch.nn.Module):
    def __init__(self, n_outs=3, hidden_channels=64, n_feat=5, n_targ=1):
        super(GCN, self).__init__()
        self.g1 = MLP(n_feat, n_outs, hidden = hidden_channels)
        self.g2 = MLP(n_outs, n_outs,  hidden = hidden_channels)
        self.g3= MLP(n_outs, n_outs, hidden = hidden_channels) 
    
        self.f = MLP(n_outs, n_targ,  hidden = hidden_channels)
        
    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        
        x = self.g1(x)
        
#         global adj, batch1, xe
        adj = edge_index
#         global neighbours
#         global N_sum
        neighbours = x
        batch1=batch
        
        N_sum = scatter_add(x[adj[0]],adj[1], dim=0)
        xe = self.g2(N_sum)
        x[adj[1]]+=xe[adj[1]] #only add where we have receiving nodes
        x = self.g3(x)

        x = global_add_pool(x, batch)

        x = self.f(x)

        return x
    
model = GCN(hidden_channels=64)
next(model.parameters()).is_cuda ##check number one

from sklearn.model_selection import train_test_split
criterion = torch.nn.MSELoss()
# criterion = torch.nn.L1Loss()
# criterion = torch.nn.SmoothL1Loss(beta=0.3)


In [18]:

n_epochs=15
n_trials=1
batch_size=int(2**8)
split=0.8
test_data=data[int(len(data)*split):]
train_data=data[:int(len(data)*split)]
# train_data, test_data=train_test_split(data, test_size=0.2)
l1_lambda = 1e-4
l2_lambda = 0
hidden = 128

yss, preds=[],[]
model = GCN(hidden_channels=hidden)
train_loader=DataLoader(train_data, batch_size=batch_size, shuffle=1, num_workers=4)

test_loader=DataLoader(test_data, batch_size=batch_size, shuffle=0, num_workers=4)    
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)
print('GPU ', next(model.parameters()).is_cuda)
# Initialize our train function
def train():
    model.train()
    
    for data in tqdm(train_loader, total=len(train_loader)): 
#         print('batch')
        out = model(data.x, data.edge_index, data.batch)  
        loss = criterion(out, data.y.view(-1,1)) 
        l1_norm = sum(p.abs().sum() for p in model.parameters())
        l2_norm = sum(p.pow(2.0).sum() for p in model.parameters())


        loss = loss + l1_lambda * l1_norm + l2_lambda * l2_norm
        loss.backward()
        optimizer.step() 
        optimizer.zero_grad() 
#     print(loss, l1_norm*l1_lambda, l2_norm*l2_lambda)
 # test function

def test(loader): ##### transform back missing
    model.eval()
    outs = []
    ys = []
    with torch.no_grad(): ##this solves it!!!
        for dat in tqdm(loader, total=len(loader)): 
            
            out = model(dat.x, dat.edge_index, dat.batch) 
            ys.append(dat.y.view(-1,n_targ))
            outs.append(out)
    outss=torch.vstack(outs)
    yss=torch.vstack(ys)
    return torch.std(outss - yss, axis=0), outss, yss


GPU  False


In [None]:
#this uses about 1 GB of memory on the GPU
n_epochs = 10 
l2_lambda =1e-5 #don't know if this is relevant
tr_acc, te_acc = [], []
start=time.time()
for epoch in range(n_epochs):
    print(epoch)
    train()
    if (epoch+1)%2==0:
        train_acc, _ , _ = test(train_loader)
        test_acc, _ , _ = test(test_loader)
        tr_acc.append(train_acc.cpu())
        te_acc.append(test_acc.cpu())
        print(f'Epoch: {epoch+1:03d}, Train scatter: {np.round(train_acc.cpu().numpy(), 4)} \n \
        Test scatter: {np.round(test_acc.cpu().numpy(), 4)}')
stop=time.time()
spent=stop-start
print(f"{spent:.2f} seconds spent training, {spent/n_epochs:.3f} seconds per epoch. Processed {len(data)*split*n_epochs/spent:.0f} trees per second")



0


100%|█████████████████████████████████████████████████████████████| 341/341 [02:52<00:00,  1.98it/s]


1


100%|█████████████████████████████████████████████████████████████| 341/341 [02:53<00:00,  1.96it/s]
100%|█████████████████████████████████████████████████████████████| 341/341 [01:12<00:00,  4.68it/s]
100%|███████████████████████████████████████████████████████████████| 86/86 [00:18<00:00,  4.67it/s]


Epoch: 002, Train scatter: [0.3627] 
         Test scatter: [0.3612]
2


100%|█████████████████████████████████████████████████████████████| 341/341 [02:53<00:00,  1.96it/s]


3


100%|█████████████████████████████████████████████████████████████| 341/341 [02:53<00:00,  1.96it/s]
 92%|████████████████████████████████████████████████████████▎    | 315/341 [01:07<00:05,  4.45it/s]

In [None]:
from datetime import date
today = date.today()

today = today.strftime("%d%m%y")

torch.save(model.state_dict(),f'trained_models/model_{epoch}_{today}.pt') 

In [None]:
model = GCN(hidden_channelse=128)
model.load_state_dict(torch.load(f'trained_models', f'model_{epoch}_{date}.pt'))

In [6]:
print('decoder', sum(p.abs().sum() for p in model.f.parameters())/sum(p.numel() for p in model.f.parameters())*100)

print('encoder', sum(p.abs().sum() for p in model.g1.parameters())/sum(p.numel() for p in model.g1.parameters())*100)
print('edge', sum(p.abs().sum() for p in model.g2.parameters())/sum(p.numel() for p in model.g2.parameters())*100)
print('both', sum(p.abs().sum() for p in model.g3.parameters())/sum(p.numel() for p in model.g3.parameters())*100)

decoder tensor(2.2795, grad_fn=<MulBackward0>)
encoder tensor(1.3704, grad_fn=<MulBackward0>)
edge tensor(0.1375, grad_fn=<MulBackward0>)
both tensor(1.4758, grad_fn=<MulBackward0>)


In [None]:
model.load_state_dict(torch.load(osp.join(pointer, model_runs[k], 'trained_model', 'model.pt')))

In [None]:
y_f = []
y_g1 = []
y_g2 = []
y_g3 = []
x_g1 = []
y_t = []
batch = []
for dat in test_loader:
    x_g1.append(dat.x.cpu().detach().numpy())
    yg1=model.g1(dat.x)
    y_g1.append(yg1.cpu().detach().numpy())
    
    adj = dat.edge_index
    neighbours = yg1
    
    yg2=model.g2(scatter_add(neighbours[adj[0]], adj[1], dim=0))
    y_g2.append(yg2.cpu().detach().numpy())
    
    yg1[adj[1]]+=yg2[adj[1]]
    
    yg3 = model.g3(yg1)
    
    x_f = global_add_pool(yg3, dat.batch)
    
    yf = model.f(x_f)
    y_f.append(yf.cpu().detach().numpy())
    
    batch.append(dat.batch.cpu().detach().numpy())
    y_t.append(dat.y.cpu().detach().numpy())

In [None]:
N=25
vals, counts = np.unique(batch[0], return_counts=1)
l = np.cumsum(counts)[N-1]
x_g1_pysr=np.vstack(x_g1[0][:l])
y_g1_pysr=np.vstack(y_g1[0][:l])

b_pysr = batch[0][:l]

In [None]:
##
g1_equations = pysr(
    X=x_g1_pysr, y=y_g1_pysr,
    procs=4,
    niterations=20,
    populations=20,
    useFrequency=True,
    multithreading=True, 
    binary_operators=["plus", "sub", "mult", "div"],
    unary_operators = ['log10_abs', 'sqrt_abs'], ##still need a general power law
    batching=1, 
    batchSize=256,
    maxsize=10, update=False
)

In [155]:
help(pysr)

Help on function pysr in module pysr.sr:

pysr(X, y, weights=None, binary_operators=None, unary_operators=None, procs=4, loss='L2DistLoss()', populations=20, niterations=100, ncyclesperiteration=300, alpha=0.1, annealing=False, fractionReplaced=0.1, fractionReplacedHof=0.1, npop=1000, parsimony=0.0001, migration=True, hofMigration=True, shouldOptimizeConstants=True, topn=10, weightAddNode=1, weightInsertNode=3, weightDeleteNode=3, weightDoNothing=1, weightMutateConstant=10, weightMutateOperator=1, weightRandomize=1, weightSimplify=0.01, perturbationFactor=1.0, timeout=None, extra_sympy_mappings=None, extra_torch_mappings=None, extra_jax_mappings=None, equation_file=None, verbosity=1000000000.0, progress=None, maxsize=20, fast_cycle=False, maxdepth=None, variable_names=None, batching=False, batchSize=50, select_k_features=None, warmupMaxsizeBy=0.0, constraints=None, useFrequency=True, tempdir=None, delete_tempfiles=True, julia_optimization=3, julia_project=None, user_input=True, update=