In [1]:
import numpy as np
import torch
import torch_geometric as pyg
import graphPINN
import math
import logging
from time import time
from tqdm.notebook import tqdm
from scipy.io import savemat, loadmat

folder = "C:\\Users\\nhmathew\\Documents\\code\\run-2023-03-24\\"

In [2]:
def pretty_size(n,pow=0,b=1024,u='B',pre=['']+[p+'i'for p in'KMGTPEZY']):
    pow,n=min(int(math.log(max(n*b**pow,1),b)),len(pre)-1),n*b**pow
    return "%%.%if %%s%%s"%abs(pow%(-pow-1))%(n/b**float(pow),pre[pow],u)

logging.basicConfig(filename=f'{folder}run.log',format='%(asctime)s - %(message)s', filemode='a+', level=logging.INFO)
def logfn(message, tq=True):
    logging.info(message)
    if tq:
        tqdm.write(message)
    else:
        print(message)

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
logfn(f"{pretty_size(torch.cuda.get_device_properties(0).total_memory)} of {'cuda' if torch.cuda.is_available() else 'cpu'} memory")

24.0 GiB of cuda memory


In [5]:
batch_size = 1
k = 100

dataset = graphPINN.data.MHSDataset(f'D:\\nats ML stuff\\data_k={k}',k=k)


trainset, validset, testset = torch.utils.data.random_split(dataset,[0.8, 0.1, 0.1],generator=torch.Generator().manual_seed(314))
# trainset, validset, testset = torch.utils.data.random_split(dataset,[0.005, 0.001, 0.994],generator=torch.Generator().manual_seed(314))

Processing...


[[        nan         nan         nan         nan         nan         nan]
 [-0.28392392  0.81494624 -0.71732434  0.23404927 -0.14284131 -0.00602433]
 [-0.36476276  0.77855327 -0.68413518  0.26200218 -0.0961105  -0.04541946]
 ...
 [ 0.55212457 -0.85547023 -0.84863978  0.38177943  0.2564918   0.25028223]
 [ 0.47173118 -0.82666037 -0.76495981  0.37654531  0.23326542  0.2541595 ]
 [ 0.40269666 -0.79750435 -0.68892387  0.37747599  0.20596134  0.25480139]]


  0%|                                                                                            | 0/6 [00:00<?, ?it/s]

[[        nan         nan         nan         nan         nan         nan]
 [-0.28392392  0.81494624 -0.71732434  0.23404927 -0.14284131 -0.00602433]
 [-0.36476276  0.77855327 -0.68413518  0.26200218 -0.0961105  -0.04541946]
 ...
 [ 0.55212457 -0.85547023 -0.84863978  0.38177943  0.2564918   0.25028223]
 [ 0.47173118 -0.82666037 -0.76495981  0.37654531  0.23326542  0.2541595 ]
 [ 0.40269666 -0.79750435 -0.68892387  0.37747599  0.20596134  0.25480139]]


 17%|██████████████                                                                      | 1/6 [00:09<00:49,  9.98s/it]

[[        nan         nan         nan         nan         nan         nan]
 [-0.69358968  0.19867853  0.16241398 -0.32219482 -0.35975336  0.66328238]
 [-0.6693937   0.17093083  0.14719214 -0.36720971 -0.29064197  0.53996955]
 ...
 [ 0.24235124  0.23656706  0.04864346  0.05830191  0.04006793 -1.03578976]
 [ 0.29020659  0.22094354  0.08837072  0.06546805  0.02957868 -1.01415173]
 [ 0.33349644  0.20429681  0.13059272  0.06888747  0.01586599 -0.99722353]]


 17%|██████████████                                                                      | 1/6 [00:16<01:22, 16.60s/it]


KeyboardInterrupt: 

In [None]:
convdesign = [18,9,6,3]
# convdesign = [18,3]
convkernel = graphPINN.KernelNN(convdesign, torch.nn.ReLU)
convgraph = graphPINN.ConvGraph(convkernel)

propdesign = [12,6,3]
# propdesign = [12,3]
propkernel = graphPINN.KernelNN(propdesign, torch.nn.ReLU)
propgraph = graphPINN.BDPropGraph(propkernel)

model = graphPINN.FullModel(propgraph, convgraph).to(device)

In [None]:
def train(model, epochs = 1, lbfgs = False, lossindex=-1, start_epoch = 0):
    
    if lbfgs:
        def closure():
            # necessary for lbfgs
            optimizer.zero_grad()
            output = model.forward(data)
            loss = graphPINN.MHS.loss(output, true)
            loss.backward()
            return loss
        optimizer = torch.optim.LBFGS(model.parameters())
    else:
        optimizer = torch.optim.Adam(model.parameters())
    
    training_loss   = torch.zeros(4,epochs)
    validation_loss = torch.zeros(4,epochs)
    
    if start_epoch != 0:
        model.load_state_dict(torch.load(f'{folder}epoch-{start_epoch}_trainsize-{len(trainset)}_k-{k}.pt'))
    for epoch in range(start_epoch, epochs):
        trainLoader = pyg.loader.DataLoader(trainset, batch_size=batch_size,shuffle=False)
        validLoader = pyg.loader.DataLoader(validset, batch_size=batch_size,shuffle=False)
        
        convgraph.train(True)
        
        running_loss = 0
        running_vec = 0
        running_mhs = 0
        running_div = 0
        iter = 0
        skipped = 0
        start_time = time()
        
        for data in tqdm(trainLoader):
            data['bd','propagates','in'].edge_index, data['bd','propagates','in'].edge_attr = \
                        pyg.utils.dense_to_sparse(
                                torch.ones(data['bd'].x.shape[0],data['in'].x.shape[0])
                        )
            
            data = data.to_homogeneous()
            data.to(device)
            
            optimizer.zero_grad()
            
            true = [data.y[:,0:3],data.x[:,3],data.x[:,4],data.x[:,5]]
            
            pred = model.forward(data)
            
            loss, vec_diff, mhs_diff, div_diff = graphPINN.MHS.loss(pred,true, index=lossindex, logfn=None)
            
            iter += 1
            
            if vec_diff+mhs_diff < 1e2:
                loss.backward()
                if lbfgs:
                    optimizer.step(closure)
                else:
                    optimizer.step()

                running_loss += loss.item()
                running_vec += vec_diff
                running_mhs += mhs_diff
                running_div += div_diff
                logfn(f'epoch {epoch+1} iter {iter}/{len(trainLoader)}, loss {loss.item()}')
            else:
                logfn(f'loss {vec_diff}+{mhs_diff}={vec_diff+mhs_diff}  skipped...')
                skipped += 1
            
        training_loss[:,epoch] = torch.tensor((running_vec/(len(trainLoader)-skipped),
                                               running_mhs/(len(trainLoader)-skipped),
                                               running_div/(len(trainLoader)-skipped),
                                              running_loss/(len(trainLoader)-skipped)
                                 ))
        logfn(f'Epoch {epoch+1} completed. Loss: {training_loss[3,epoch]}; Total skipped: {skipped}; Total time: {time()-start_time}', tq=True)
        logfn(f'running vec: {training_loss[0,epoch]}, running mhs: {training_loss[1,epoch]}, running div: {training_loss[2,epoch]}')
        
        model.train(False)
        torch.save(model.state_dict(), f'{folder}epoch-{epoch+1}_trainsize-{len(trainset)}_k-{k}.pt')
        
        start_time = time()
        valid_skipped = 0
        valid_vec = 0
        valid_mhs = 0
        valid_div = 0
        running_valid = 0
        for data in validLoader:
            data['bd','propagates','in'].edge_index, data['bd','propagates','in'].edge_attr = \
                        pyg.utils.dense_to_sparse(
                                torch.ones(data['bd'].x.shape[0],data['in'].x.shape[0])
                        )
            
            data = data.to_homogeneous()
            data.to(device)
            
            true = [data.y[:,0:3],data.x[:,3],data.x[:,4],data.x[:,5]]
            pred = model.forward(data)
            
            loss, vec, mhs, div = graphPINN.MHS.loss(pred,true, index=lossindex, logfn=None)
            
            if vec+mhs < 2e2:
                running_valid += loss.item()
                valid_vec += vec
                valid_mhs += mhs
                valid_div += div
            else:
#                 logfn('  skipped')
                valid_skipped += 1
        validation_loss[:,epoch] = torch.tensor((valid_vec/(len(validLoader)-valid_skipped),
                                                 valid_mhs/(len(validLoader)-valid_skipped),
                                                 valid_div/(len(validLoader)-valid_skipped),
                                             running_valid/(len(validLoader)-valid_skipped)
                                   ))
        logfn(f'Validation loss: {validation_loss[3,epoch]}, total skipped: {valid_skipped}, validation time: {time()-start_time}', tq=True)
#         logfn(f'running vec: {validation_loss[0,epoch]}, running mhs: {validation_loss[1,epoch]}, running div: {validation_loss[2,epoch]}')
        
            
    return running_loss, training_loss, validation_loss

epochs = 5
key = 0
lossdict = {'index_array':[[0,1,2],[3,4,5],[0,1,2],[3,4,5],-1]}
for index in tqdm(lossdict['index_array']):
    print(f'key {key} - index {index}')
    if key == 0:
        loss, training_loss, validation_loss = train(model, epochs=epochs, lossindex=index)
    else:
        loss, training_loss, validation_loss = train(model, epochs=epochs, lossindex=index)
    torch.save(model.state_dict(), f'{folder}key-{key}_trainsize-{len(trainset)}_k-{k}.pt')
    lossdict[f'train{key}'] = training_loss.numpy()
    lossdict[f'valid{key}'] = validation_loss.numpy()
    logfn(f'training loss:\n{lossdict[f"train{key}"]}', tq=False)
    logfn(f'validation loss:\n{lossdict[f"valid{key}"]}', tq=False)
    torch.save(model.state_dict(), f'{folder}model_key-{key}_trainsize-{len(trainset)}_k-{k}.pt')
    key = key + 1
    savemat(f'{folder}loss_key-{key}_params-{math.prod(convdesign)+math.prod(propdesign)}.mat', lossdict)
lossdict['index_array'] = str(lossdict['index_array'])

In [None]:
savemat(f'{folder}loss_{epochs}_trainsize-{len(trainset)}_k-{k}_params-{math.prod(convdesign)+math.prod(propdesign)}.mat', lossdict)

In [None]:
print(f'{pretty_size(torch.cuda.memory_allocated())}/{pretty_size(torch.cuda.max_memory_allocated())} allocated, ' +
      f'{pretty_size(torch.cuda.memory_reserved())}/{pretty_size(torch.cuda.get_device_properties(0).total_memory)} reserved')

In [None]:
# convgraph.load_state_dict(torch.load('epoch-0_trainsize-3092_k-100.pt'))

In [None]:
sample = dataset[0]
sample['bd','propagates','in'].edge_index, sample['bd','propagates','in'].edge_attr = \
            pyg.utils.dense_to_sparse(
                        torch.ones(sample['bd'].x.shape[0],sample['in'].x.shape[0])
            )
print(sample)

In [None]:
homosamp = sample.to_homogeneous()

In [None]:
print(sample['in'].x)

In [None]:
print(homosamp.edge_index[:,homosamp.edge_type==1])

In [None]:
lossdict

In [None]:
mat = loadmat(f'{folder}loss_{5}_trainsize-{len(trainset)}_k-{k}_params-90.mat')

In [None]:
for key in mat:
    print(key)
    print(mat[key])

In [None]:
range(5)