In [23]:
import os, sys, argparse, importlib, time, inspect
import numpy as np
import matplotlib.pyplot as plt
import os.path as osp
if hasattr(__builtins__,'__IPYTHON__'):
    print('Notebook')
    from tqdm.notebook import tqdm
else:
    print('Not notebook')
    from tqdm import tqdm
from tensorflow.keras import backend as K
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 
import tensorflow as tf
import tensorflow_probability as tfp

gpu_devices = tf.config.list_physical_devices('GPU') 
if len(gpu_devices) > 0:
    print("GPU detected")
    tf.config.experimental.set_memory_growth(gpu_devices[0], True)
else:
    print('No GPU detected')

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.models import load_model, Model
import spektral

from spektral.data import DisjointLoader, BatchLoader, SingleLoader
from importlib import reload
import winsound
import dill, wandb
import datetime as dt
wandblog=0
if wandblog:
    !wandb login b5b917a9390932e56fccfcbff6f528ccd85c44bf
run_counter=0

Notebook
GPU detected


In [24]:
tf.__version__, spektral.__version__

('2.4.1', '1.0.3')

In [25]:
################################################
# Setup Deafult Variables                       # 
################################################
learning_rate = 1e-4
batch_size    = 512
epochs        = 20
n_data       = 1e4
scenario    = "GAT_test"+str(run_counter)
patience = 20

################################################
# Setup Hyperparameters                        # 
################################################
hidden_states = 'N/A'
forward       = False
dropout       = 'None'
loss_method   = "loss_func_linear_angle"
n_neighbors   = 6 # SKRIV SELV IND

In [26]:
################################################
# Load data                      # 
################################################

import data_load as dl
reload(dl)
graph_data=dl.graph_data
dataset=graph_data(n_data=n_data, restart=1, transform=True)
idx_lists = dataset.index_lists
# Split data
dataset_train = dataset[idx_lists[0]]
dataset_val   = dataset[idx_lists[1]]
dataset_test  = dataset[idx_lists[2]]

loader_train = DisjointLoader(dataset_train, epochs=epochs, batch_size=batch_size) # the different loaders work very very differently, beware
loader_test = DisjointLoader(dataset_test, batch_size=batch_size, epochs=1)

winsound.Beep(400,300)

Removed and ready to reload
Connecting to db-file
Loading Muons
Reading files
Splitting data to events
Generating adjacency matrices




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10000.0), HTML(value='')))




Saving dataset
Loading data to memory


In [74]:
from evals import metricsxpos as metrics

def test_angle(loader, plot=True):
    '''Function to test and plot performance of Graph DL
    input should be dom pos x,y,z , time, charge(log10)
    target should be energy(log10),zenith angle, azimuthal angle, NOT unit vec 
    '''
    loss = 0
    prediction_list, target_list = [], []
    for batch in loader:
        inputs, targets = batch
        inputs[0][:, :3] = inputs[0][:, :3] / 1000 #always pay attention to these two normalizations
        predictions, targets, out = test_step(inputs, targets)
        loss           += out
        
        prediction_list.append(predictions)
        target_list.append(targets)

    y_reco  = tf.concat(prediction_list, axis = 0).numpy()
    y_true  = tf.concat(target_list, axis = 0)
    y_true  = tf.cast(y_true, tf.float32).numpy()

    energy = y_true[:, 0]
    counts, bins = np.histogram(energy, bins = 10)

    xs = (bins[1:] + bins[: -1]) / 2

    w_energies, u_angles = [], []

    for i in range(len(bins)-1):
        idx = np.logical_and(energy > bins[i], energy < bins[i + 1])

        w, u_angle = metrics(y_true[idx, :], y_reco[idx, :])

        w_energies.append(w)
        u_angles.append(u_angle)

    if plot:
        fig, ax = plt.subplots(ncols = 2, nrows = 1, figsize = (12, 6))

        for a in ax:
            a_ = a.twinx()
            a_.step(xs, counts, color = "gray", zorder = 10, alpha = 0.7, where = "mid")
            a_.set_yscale("log")
            a.set_xlabel("Log Energy")
        
        ax_top = ax

        # Energy reconstruction
        ax_top[0].scatter(xs, w_energies)
        ax_top[0].set_title("Energy Performance")
        ax_top[0].set_ylabel(r"$w(\Delta log(E)$")

        # Angle reconstruction
        ax_top[1].scatter(xs, u_angles)
        ax_top[1].set_title("Angle Performance")
        ax_top[1].set_ylabel(r"$u(\Delta \Omega)$")

        return fig, ax

In [75]:
if wandblog:
    #checks
    wandb.init(project="icecube", name=scenario,entity="chri862z")

    # Declare for log
    wandb.config.hidden_states = hidden_states
    wandb.config.hidden_states = n_data
    wandb.config.forward = forward
    wandb.config.dropout = dropout
    wandb.config.learning_rate = learning_rate
    wandb.config.batch_size = batch_size
    wandb.config.loss_func = loss_method
    wandb.config.n_neighbors = n_neighbors
    wandb.config.optimizer = 'Adam'

In [76]:
################################################
# Load Model and do checks                      # 
################################################
# import models.GCN0 as m
import models.probreg_GCN as m
reload(m)
# model=m.GCN0(6)
model=m.model()
# model.compile('adam', 'categorical_crossentropy') ## this is a basic setup with predetermined optimizers and so on

In [103]:
# ################################################
# # Setup functions                            # 
# ################################################
import loss.loss_probreg as loss_funcs
reload(loss_funcs)
loss_func=loss_funcs.sig_likelihood

def lr_schedule(epochs = epochs, initial = learning_rate, decay = 0.9):
    n = 1
    lr = initial
    yield lr
    while n < 3:
        lr *= 2
        n  += 1
        yield lr
    while True:
        lr *= decay
        n  += 1 
        yield lr


#make functions into tf functions

@tf.function(input_signature = loader_train.tf_signature(), experimental_relax_shapes = True)
def train_step(inputs, targets):
    with tf.GradientTape() as tape:
        global predictionsval
        predictions = model(inputs, training = True)
        predictionsval=tf.cast(predictions, tf.float32) 
        targets     = tf.cast(targets, tf.float32)
#         tf.print(tf.shape(predictions),tf.shape(targets))
        loss        = loss_func(predictions, targets)
        loss       += sum(model.losses)

    gradients = tape.gradient(loss, model.trainable_variables)
    opt.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

@tf.function(input_signature = loader_test.tf_signature(), experimental_relax_shapes = True)
def test_step(inputs, targets):
    predictions = model(inputs, training = False)
    targets     = tf.cast(targets, tf.float32) 
    out         = loss_func(predictions, targets)

    return predictions, targets, out


def validation(loader):
    loss = 0
    prediction_list, target_list = [], []
    for batch in loader:
        inputs, targets = batch
        # inputs[0][:, :3] = inputs[0][:, :3] / 1000
        # inputs[0][:, 3] = inputs[0][:, 3]
        # targets[:, 1:4] = targets[:, 1:4] / 1000
        predictions, targets, out = test_step(inputs, targets)
        loss           += out
        
        prediction_list.append(predictions)
        target_list.append(targets)
    
    y_reco  = tf.concat(prediction_list, axis = 0)
    y_true  = tf.concat(target_list, axis = 0)
    y_true  = tf.cast(y_true, tf.float32)
    global losses
    # w_energy, u_pos, u_angle = metrics(y_reco, y_true) #still missing something
    loss, losses = loss_func(y_reco, y_true, re=True)

    return loss, losses, metrics(y_reco, y_true)



opt = Adam(learning_rate)

In [104]:
run_counter+=1
k=0.2
tot_time=0
current_batch = 0
current_epoch = 1
loss          = 0
lowest_loss   = np.inf
early_stop    = 1
early_stop_counter    = 0
pbar0          = tqdm(total = epochs, position = 0, leave = True)
pbar0.set_description(f"Epochbar")
pbar          = tqdm(total = loader_train.steps_per_epoch, position = k, leave = True)
start_time    = time.time()
lr_gen        = lr_schedule(initial=learning_rate)
learning_Rate = next(lr_gen)
cwd = osp.abspath('')
save_path = osp.join(cwd, 'trained_models/'+scenario)
if not osp.isdir(save_path):
    os.makedirs(save_path)
    print('New folder for saving '+scenario+' made')
# Implement saving model archictecture to wandb
for batch in loader_train:
    inputs, targets = batch
    inputs[0][:, :3] = inputs[0][:, :3] / 1000 #normalize position
    out             = train_step(inputs, targets)
    loss           += out
#     if current_epoch==1 and current_batch==0:
# #         model.summary()
    
    current_batch  += 1
    pbar.update(1)
    pbar.set_description(f"Epoch {current_epoch} / {epochs}; Avg_loss: {loss / current_batch:.6f}")
    
    
    if current_batch == loader_train.steps_per_epoch:
        t=time.time() - start_time
        tot_time+=t
        print(f"Epoch {current_epoch} of {epochs} done in {t:.2f} seconds using learning rate: {learning_rate:.2E}")
        print(f"Avg loss of train: {loss / loader_train.steps_per_epoch:.6f}")

        loader_val    = DisjointLoader(dataset_val, epochs = 1,      batch_size = batch_size)
#         val_loss, val_loss_from, val_metric = validation(loader_val)
        val_loss, val_loss_from, val_metric = validation(loader_val)
        if wandblog:
            wandb.log({"Train Loss":      loss / loader_train.steps_per_epoch,
                       "Validation Loss": val_loss, 
                       "Energy metric":   val_metric[0],
                       "Angle metric":    val_metric[1],
                       "Learning rate":   learning_rate})

        print(f"Avg loss of validation: {val_loss:.6f}")
        print(f"Loss from:  Energy: {val_loss_from[0]:.6f} \t Angle: {val_loss_from[1]:.6f} ")
        print(f"Energy: w = {val_metric[0]:.6f} \t Angle: u = {val_metric[1]:.6f}")

        if val_loss < lowest_loss:
            early_stop_counter = 0
            lowest_loss        = val_loss
        else:
            early_stop_counter += 1
        print(f'Early stop counter: {early_stop_counter}/{patience}, lowest loss was {lowest_loss:.6f}')
        if early_stop and (early_stop_counter >= patience):
            model.save(save_path)
            print(f"Stopped training. No improvement was seen in {patience} epochs")
            break

        if current_epoch != epochs:
            pbar          = tqdm(total = loader_train.steps_per_epoch, position = k, leave = True)

        learning_rate = next(lr_gen)
        opt.learning_rate.assign(learning_rate)
        pbar0.update(1)
        time_avg=tot_time/current_epoch
        delta=dt.timedelta(seconds=time_avg*(epochs-current_epoch))
        now = dt.datetime.now()
        then=now+delta
        time_e = then.strftime("%H:%M:%S")
        pbar0.set_description(f"Expect to finish at {time_e}")
#         if current_epoch % 10 == 0:
#             model.save(save_path)
#             print("Model saved")

        loss            = 0
        start_time      = time.time()
        current_epoch  += 1
        current_batch   = 0
winsound.Beep(400,300)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=20.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))







[512 3 3] [512 1 3]
[512 3 3] [512 3]
[512 3 3] [512 1 3]
[512 3 3] [512 3]
[512 3 3] [512 1 3]
[512 3 3] [512 3]
[512 3 3] [512 1 3]
[512 3 3] [512 3]
[512 3 3] [512 1 3]
[512 3 3] [512 3]
[512 3 3] [512 1 3]
[512 3 3] [512 3]
[512 3 3] [512 1 3]
[512 3 3] [512 3]
[512 3 3] [512 1 3]
[512 3 3] [512 3]
[512 3 3] [512 1 3]
[512 3 3] [512 3]
[512 3 3] [512 1 3]
[512 3 3] [512 3]
[512 3 3] [512 1 3]
[512 3 3] [512 3]
[512 3 3] [512 1 3]
[512 3 3] [512 3]
[320 3 3] [320 1 3]
[320 3 3] [320 3]
[512 3 3] [512 1 3]
[512 3 3] [512 3]
[512 3 3] [512 1 3]
[512 3 3] [512 3]
[512 3 3] [512 1 3]
[512 3 3] [512 3]
Epoch 1 of 20 done in 13.91 seconds using learning rate: 1.00E-04
Avg loss of train: 8.582011
[512 3 3] [512 1 3]
[512 3 3] [512 3]
[488 3 3] [488 1 3]
[488 3 3] [488 3]
[1000 3 3] [1000 1 3]
[1000 3 3] [1000 3]


InvalidArgumentError: Incompatible shapes: [1000,65] vs. [1000,2] [Op:Mul]

In [None]:
fig, ax = test_angle(loader_test)
if wandblog:
    fig.savefig(f"model_tests/{scenario}_test.pdf")

In [None]:
################################################
# Evaluating our model                 # 
################################################

trainable_count = int(
    np.sum([K.count_params(p) for p in model.trainable_weights]))
non_trainable_count = int(
    np.sum([K.count_params(p) for p in model.non_trainable_weights]))

print('Total params: {:,}'.format(trainable_count + non_trainable_count))
print('Trainable params: {:,}'.format(trainable_count))
print('Non-trainable params: {:,}'.format(non_trainable_count))

In [109]:
loader_val    = DisjointLoader(dataset_val, epochs = 1,      batch_size = batch_size)
for i,batch in enumerate(loader_val):
    inputs, targets = batch
    predictions=model(inputs)
    print(predictions)
    print(i)

tf.Tensor(
[[ 0.5621819  -0.8270136   0.45492536 ...  0.7533319   0.06258322
   0.65969706]
 [ 0.5288799  -0.8486967   2.104484   ...  1.6576217   0.00905073
   0.941267  ]
 [ 0.5087528  -0.8609127   0.8546529  ...  0.76553595  0.22838165
   0.91031104]
 ...
 [ 0.47644016 -0.8792069   1.5868886  ...  1.211982    0.08758099
   0.8337561 ]
 [ 0.5530459  -0.8331508   0.7767262  ...  0.91565657  0.04885986
   0.7137921 ]
 [ 0.5379181  -0.8429971   1.0938455  ...  1.1507676   0.0143856
   0.7810372 ]], shape=(512, 66), dtype=float32)
0
tf.Tensor(
[[ 5.3968114e-01 -8.4186947e-01  7.3556215e-01 ...  9.1850841e-01
   4.4340096e-02  6.5779239e-01]
 [ 5.4487377e-01 -8.3851808e-01  9.0674520e-01 ...  9.6494937e-01
   1.0598631e-01  6.5970379e-01]
 [ 5.0768113e-01 -8.6154503e-01  2.2350392e+00 ...  1.7267858e+00
   1.3991188e-01  8.8067931e-01]
 ...
 [ 5.3346878e-01 -8.4581971e-01  4.2787376e-01 ...  6.5759474e-01
   4.2334303e-02  6.8938816e-01]
 [ 4.7935590e-01 -8.7762064e-01  1.1447527e-03 ... 

In [107]:
predictions

<tf.Tensor: shape=(488, 66), dtype=float32, numpy=
array([[ 0.53359705, -0.8457388 ,  1.5609174 , ...,  1.349319  ,
         0.01543205,  0.7901085 ],
       [ 0.51036775, -0.8599562 ,  2.224135  , ...,  1.6503104 ,
         0.04791152,  0.9609264 ],
       [ 0.47912487, -0.8777468 ,  0.10259184, ...,  0.51117367,
         0.1419247 ,  0.50054836],
       ...,
       [ 0.57406366, -0.8188107 ,  1.2186431 , ...,  1.0840503 ,
         0.05218859,  0.9712802 ],
       [ 0.47296175, -0.88108295,  0.16127457, ...,  0.56065345,
         0.06562333,  0.44658703],
       [ 0.5698642 , -0.82173884,  0.7507787 , ...,  0.8312793 ,
         0.08987123,  0.77896404]], dtype=float32)>