## Load libraries

In [1]:
# Add root project directory to path
import sys
sys.path.append("../../../")

import tensorflow as tf
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import pickle
import time
from collections import defaultdict

from diffnet.dataset_loader import *
from diffnet.diffnet import *

from sklearn.metrics import r2_score

import eli5
from eli5.sklearn import PermutationImportance

## Check status

In [2]:
!nvidia-smi

Thu May 26 15:48:13 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 208...  On   | 00000000:09:00.0 Off |                  N/A |
| 35%   38C    P8    21W / 260W |     15MiB / 11019MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  GeForce RTX 208...  On   | 00000000:0A:00.0 Off |                  N/A |
| 35%   36C    P8    21W / 260W |      5MiB / 11019MiB |      0%      Defaul

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] ="1"

tf.config.list_physical_devices("GPU")

# allocate as small memory as possible
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth=True
session = tf.compat.v1.Session(config=config)

In [4]:
## fix seed
def seed_everything(seed):
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    tf.random.set_seed(seed)

### FUNCTIONS

In [5]:
# necessary functions for machine learning model

def cal_mse(y_true, y_pred):
    loss = tf.reduce_mean(tf.square(y_true-y_pred))
    return loss

def train_step(model, x, y, optimizer):
    with tf.GradientTape() as tape:
        y_pred = model(x, training=True)
        loss = cal_mse(y_true=y, y_pred=y_pred)
        
    grads = tape.gradient(loss, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    
    return y_pred, loss

In [6]:
class EarlyStopping():
    def __init__(self, patience=0, verbose=0):
        self._step = 0
        self._loss = float('inf')
        self.patience  = patience
        self.verbose = verbose

    def validate(self, loss):
        if self._loss < loss:
            self._step += 1
            if self._step > self.patience:
                if self.verbose:
                    print(f'Training process is stopped early....')
                return True
        else:
            self._step = 0
            self._loss = loss

        return False

In [7]:
def training(model, is_transfer, source_weight_path, 
             n_steps, step, min_step, min_loss, train_iter, val_iter, 
             prefix, save_dir, es, random_state):
    
    seed_everything(random_state)
    
    train_loss_list = []
    train_step_idx = []
    val_loss_list = []
    val_step_idx = []
    
    if is_transfer:
        model.load_weights(source_weight_path)

    start = time.time()
    while step < n_steps+1:
        if step == 0:
            step += 1
            continue

        x, y = next(train_iter)
        y_pred, loss = train_step(model, x, y, optimizer)

        train_loss_list.append(loss)
        train_step_idx.append(step)
        #print(loss)

        if step % 1000 == 0:
            val_x, val_y = next(val_iter)
            val_y_pred = diffnet(val_x, training=False)

            val_loss = cal_mse(y_true=val_y, y_pred=val_y_pred)

            print("[{:d}] Loss Val: {:7.4f}, MIN MSE ({:d}): {:.4f}".format(step, val_loss, min_step, min_loss))

            val_loss_list.append(val_loss)
            val_step_idx.append(step)

            if val_loss < min_loss:
                min_step = step
                print("NEW MIN LOSS : %7.4f" % val_loss)
                model.save_weights("{}/diffnet-{}-min.h5".format(prefix+save_dir, save_dir))
                min_loss = val_loss

                now = time.time()
                with open("{}/training_time.txt".format(prefix+save_dir), "a") as h:
                    h.write("{step}\t{time}\t{loss}\n".format(step=min_step, time=now-start, loss=min_loss))

                h.close()
                
            if es.validate(val_loss):
                break

        step+=1
        
    return train_step_idx, train_loss_list, val_step_idx, val_loss_list

## Load Training data

In [8]:
# Determine size of training data & determine transfer or not

n_data = 300
is_transfer = False
source_bar = 1
start_idx = 1
end_idx = 10

if is_transfer:
    prefix = "../../../checkpoint/Transfer_learning/size_{n_data}/source_{sb}bar/".format(n_data=n_data, sb=source_bar)
else:
    prefix = "../../../checkpoint/Direct_learning/size_{}/".format(n_data)
    
save_dir_list = [f"set_{i}" for i in range(start_idx, end_idx+1)]

In [10]:
FEATURE_INPUT_FILES = ["../../../data/features/core_mof_geo+gridhist_50.txt"]
LABEL_INPUT_FILES = ["../../../data/labels/coremof_d_ch4_rawset.txt"]

f_input_df = pd.read_csv(FEATURE_INPUT_FILES[0], sep="\s+", index_col=0)
l_input_df = pd.read_csv(LABEL_INPUT_FILES[0], sep="\s+", index_col=0, header=None)

for i in range(len(FEATURE_INPUT_FILES)-1):
    idx = i+1
    
    try:
        f_df = pd.read_csv(FEATURE_INPUT_FILES[idx], sep="\s+", index_col=0)
        l_df = pd.read_csv(LABEL_INPUT_FILES[idx], sep="\s+", index_col=0, header=None)

        f_input_df = pd.concat([f_input_df, f_df])
        l_input_df = pd.concat([l_input_df, l_df])
    
    except:
        continue
    
f_input_df = f_input_df.sample(frac=1)

In [11]:
## normalize geometric features
geo_cols = f_input_df.columns[:5]
geo_df = f_input_df[:][geo_cols]

max_v = [60, 60, 4000, 10000, 1]
    
f_input_df[:][geo_cols] = f_input_df[:][geo_cols] / max_v

# only for geo only
#f_input_df = f_input_df[:][geo_cols]

In [12]:
f_input_df.head()

Unnamed: 0_level_0,LCD,PLD,Vol_ASA,Grav_ASA,Void_fraction,0.0,0.02,0.04,0.06,0.08,...,0.8,0.82,0.84,0.86,0.88,0.9,0.92,0.94,0.96,0.98
MOF,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BEFMAC_clean,0.089951,0.057887,0.314925,0.083036,0.06358,0.0,0.0,0.004082,0.012245,0.0,...,0.0,0.0,0.0,0.0,0.008163,0.0,0.0,0.0,0.0,0.879592
IKETUO_clean,0.090777,0.070132,0.432138,0.102958,0.11112,0.0,0.0,0.0,0.0,0.0,...,0.0,0.002849,0.005698,0.0,0.0,0.0,0.0,0.002849,0.0,0.826211
ja073612tsi20070723_060520_clean,0.097501,0.050559,0.0,0.0,0.11166,0.0,0.0,0.0,0.0,0.0,...,0.000916,0.003663,0.002747,0.000916,0.0,0.003663,0.000916,0.003663,0.003663,0.828755
XOMDOU_clean,0.101477,0.077729,0.354425,0.119939,0.082,0.0,0.0,0.0,0.001137,0.003204,...,0.000413,0.000413,0.000827,0.000827,0.00124,0.00062,0.00062,0.00186,0.000413,0.89323
CUVHEH_clean,0.21298,0.200457,0.391778,0.094706,0.26824,0.0,0.0,0.0,0.0,0.0,...,0.002104,0.002946,0.001543,0.002245,0.002946,0.002525,0.001684,0.000842,0.000421,0.68287


In [13]:
l_input_df_ = l_input_df.copy()
l_data = np.log10(l_input_df_)

max_v = 2.5
min_v = -4.5

In [14]:
l_data = (l_data - min_v) / (max_v - min_v)
f_data = f_input_df.copy()

In [15]:
for save_dir in save_dir_list:
    l_input_df_ = l_input_df_.sample(frac=1)
    
    print(f"{save_dir}")
    
    if is_transfer:
        print("Tranfer learning")
        dl_prefix = f"../../../checkpoint/Direct_learning/size_{n_data}/" + save_dir

        with open(dl_prefix+"/train_data.pickle", "rb") as f:
            train_data = pickle.load(f)
        f.close()

        with open(dl_prefix+"/val_data.pickle", "rb") as g:
            val_data = pickle.load(g)
        g.close()
        
        with open(dl_prefix+"/test_data.pickle", "rb") as g:
            test_data = pickle.load(g)
        g.close()

        print("train data : {}, val data : {}, test data : {}".format(len(train_data), 
                                                                      len(val_data), 
                                                                      len(test_data)))

    else:
        print("Direct learning...")
        l_data = l_data[:n_data]
        # shuffle input data
        data_name = []

        for name in l_data.index:
            if name in f_data.index:
                data_name.append(name)

        np.random.shuffle(data_name)

        n_train = int(0.72*len(data_name))
        n_val = int(0.08*len(data_name))
        n_test = len(data_name) - n_train - n_val

        train_data = data_name[:n_train]
        val_data = data_name[n_train:n_train+n_val]
        test_data = data_name[-n_test:]

        print("train data : {}, val data : {}, test data : {}".format(len(train_data), 
                                                                      len(val_data), 
                                                                      len(test_data)))
        
    # save data for training
    if not os.path.exists(prefix+save_dir):
        os.makedirs(prefix+save_dir)

    with open("{}/train_data.pickle".format(prefix+save_dir), "wb") as f:
        pickle.dump(train_data, f)
    f.close()

    with open("{}/val_data.pickle".format(prefix+save_dir), "wb") as g:
        pickle.dump(val_data, g)
    g.close()
    
    with open("{}/test_data.pickle".format(prefix+save_dir), "wb") as g:
        pickle.dump(test_data, g)
    g.close()

    with open("{}/training_time.txt".format(prefix+save_dir), "w") as h:
        h.write("Step\tTime duration\tloss(mse)\n")
    h.close()

set_1
Direct learning...
train data : 216, val data : 24, test data : 60
set_2
Direct learning...
train data : 216, val data : 24, test data : 60
set_3
Direct learning...
train data : 216, val data : 24, test data : 60
set_4
Direct learning...
train data : 216, val data : 24, test data : 60
set_5
Direct learning...
train data : 216, val data : 24, test data : 60
set_6
Direct learning...
train data : 216, val data : 24, test data : 60
set_7
Direct learning...
train data : 216, val data : 24, test data : 60
set_8
Direct learning...
train data : 216, val data : 24, test data : 60
set_9
Direct learning...
train data : 216, val data : 24, test data : 60
set_10
Direct learning...
train data : 216, val data : 24, test data : 60


### Start training

In [16]:
# Hyperparameters
n_steps = 200000
lr = 0.00001
optimizer = tf.optimizers.Adam(lr=lr)
init_step = 0

source_weight_path = f"../../../checkpoint/source_geo+gridhist/CH4_pm+tb_ai_{source_bar}bar/diffnet-CH4_pm+tb_ai_{source_bar}bar-min.h5"

# min rmse
min_loss = 1e30
min_step = 0

In [17]:
for save_dir in save_dir_list:
    seed_idx = int(save_dir.split("_")[-1])
    print(f"{save_dir} starts...")
    print("load data")
    # load train data / val data name
    with open("{}/train_data.pickle".format(prefix+save_dir), "rb") as f:
        train_data = pickle.load(f)
    f.close()

    with open("{}/val_data.pickle".format(prefix+save_dir), "rb") as g:
        val_data = pickle.load(g)
    g.close()
    
    # load dataloader
    dataloader = DataLoader()
    
    train_f_data, train_l_data = dataloader.arrange_data(f_data, l_data, train_data)
    val_f_data, val_l_data = dataloader.arrange_data(f_data, l_data, val_data)
    
    train_dataset = dataloader.make_dataset(np.array(train_f_data), np.array(train_l_data), 
                                        batch_size=len(train_l_data), repeat=True, shuffle=True, buffer_size=len(train_l_data))

    val_dataset = dataloader.make_dataset(np.array(val_f_data), np.array(val_l_data), 
                                           batch_size=len(val_l_data), repeat=True, shuffle=False)


    train_dataset_iter = iter(train_dataset)
    val_dataset_iter = iter(val_dataset)
    
    # load model
    print("load model")
    diffnet = DiffNET(input_size=(len(f_data.columns),), dropout=0.5, transfer=is_transfer)
    diffnet.initialize_weights()
    diffnet.summary()
    
    early_stopping = EarlyStopping(patience=20, verbose=1)
    
    print("train starts")
    # training stage
    train_step_idx_, train_loss_list_, val_step_idx_, val_loss_list_ = training(
        model=diffnet, is_transfer=is_transfer, source_weight_path=source_weight_path, 
        n_steps=n_steps, step=init_step, min_step=min_step, min_loss=min_loss, 
        train_iter=train_dataset_iter, val_iter=val_dataset_iter, 
        prefix=prefix, save_dir=save_dir, es=early_stopping, random_state=seed_idx)
    
    # draw loss plot
    plt.figure(figsize=(13,10))
    plt.plot(np.array(train_step_idx_) / 1e3, train_loss_list_, label="train_loss")
    plt.plot(np.array(val_step_idx_) / 1e3, val_loss_list_, label="val_loss")
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.legend(fontsize=18)
    plt.ylim(0, 0.05)
    plt.xlabel("Step (x$10^{3}$)", fontsize=25)
    plt.ylabel("Loss (MSE)", fontsize=25)
    plt.savefig("{}/loss_plot.png".format(prefix+save_dir), bbox_inches="tight")
    #plt.show()
    
    # load test data
    with open("{}/test_data.pickle".format(prefix+save_dir), "rb") as g:
        test_data = pickle.load(g)
    g.close()
    
    test_f_data, test_l_data = dataloader.arrange_data(f_data, l_data, test_data)
    test_dataset = dataloader.make_dataset(np.array(test_f_data), np.array(test_l_data),
                           batch_size=len(test_l_data), shuffle=False, repeat=False)

    test_dataset = test_dataset.batch(len(test_l_data))
    
    # load min model
    diffnet = DiffNET(input_size=(len(f_data.columns),), transfer=False)
    diffnet.initialize_weights()
    diffnet.load_weights("{}/diffnet-{}-min.h5".format(prefix+save_dir, save_dir))
    
    # check mse and r2 score for test data
    test_y_pred = []
    test_y_true = []

    for x, y in test_dataset:
        _y = diffnet(x, training=False)
        test_y_pred += _y.numpy().reshape(-1).tolist()
        test_y_true += y.numpy().reshape(-1).tolist()

    test_y_pred = np.array(test_y_pred)
    test_y_true = np.array(test_y_true)
    
    y_true_ = test_y_true * (max_v - min_v) + min_v
    y_pred_ = test_y_pred * (max_v - min_v) + min_v

    test_mse = cal_mse(y_true=y_true_, y_pred=y_pred_).numpy().item()
    test_r2score = r2_score(y_true_, y_pred_)
    
    plt.figure(figsize=(12,10), dpi=300)
    hb = plt.hexbin(y_true_, y_pred_, gridsize=80, cmap="bwr", mincnt=1)
    plt.plot([min_v, max_v], [min_v, max_v], color="black", ls=":")
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.xlim([min_v, max_v])
    plt.ylim([min_v, max_v])
    plt.xlabel("Calculated", fontsize=25)
    plt.ylabel("Prediction", fontsize=25)
    #plt.xscale("log")
    #plt.yscale("log")
    plt.text(min_v+0.1, max_v - 0.5, "RMSE : {:.4f}".format(np.sqrt(test_mse)), fontsize=25)
    plt.text(min_v+0.1, max_v - 0.8, "R2 score : {:.4f}".format(test_r2score), fontsize=25)
    cb = plt.colorbar(hb)
    cb.set_label("Counts", fontsize=25, rotation=270, labelpad=25)
    for t in cb.ax.get_yticklabels():
        t.set_fontsize(20)
    plt.savefig("{}/{}-testset_hexbin.png".format(prefix+save_dir, save_dir), bbox_inches="tight")
    plt.close("all")
    #plt.show()
    
    h = open("{}/{}-results.txt".format(prefix+save_dir, save_dir), "w")
    h.write("RMSE\tR2_score\n")
    h.write(f"{np.sqrt(test_mse):.4f}\t{test_r2score:.4f}")
    h.close()
    
    print(f"{save_dir} ends...")

set_1 starts...
load data
load model
Model: "diff_net"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout (Dropout)            multiple                  0         
_________________________________________________________________
dense (Dense)                multiple                  28672     
_________________________________________________________________
dense_1 (Dense)              multiple                  65664     
_________________________________________________________________
dense_2 (Dense)              multiple                  129       
Total params: 94,465
Trainable params: 94,465
Non-trainable params: 0
_________________________________________________________________
train starts
[1000] Loss Val:  0.0064, MIN MSE (0): 1000000000000000019884624838656.0000
NEW MIN LOSS :  0.0064


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/lim/anaconda3/envs/diffnet/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-17-00569887fb37>", line 40, in <module>
    train_step_idx_, train_loss_list_, val_step_idx_, val_loss_list_ = training(
  File "<ipython-input-7-f90fe408495f>", line 21, in training
    x, y = next(train_iter)
  File "/home/lim/anaconda3/envs/diffnet/lib/python3.8/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 747, in __next__
    return self._next_internal()
  File "/home/lim/anaconda3/envs/diffnet/lib/python3.8/site-packages/tensorflow/python/data/ops/iterator_ops.py", line 730, in _next_internal
    ret = gen_dataset_ops.iterator_get_next(
  File "/home/lim/anaconda3/envs/diffnet/lib/python3.8/site-packages/tensorflow/python/ops/gen_dataset_ops.py", line 2574, in iterator_get_next
    _result = pywrap_tfe.TFE_Py_FastPathExecute(
K

TypeError: object of type 'NoneType' has no len()