# Demo for hyperparameter tunning with grid search

In [1]:
import os 
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.lines as mlines
from matplotlib import pyplot as plt
from joblib import Parallel, delayed

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import roc_auc_score

from xnn.sosxnn import SOSxNN

## Simulation Settings

In [2]:
# Simulation
corr = 0.5
noise_sigma = 1
DummyNum = 0
FeatureNum = 10
TestNum = 10000
DataNum = 10000

# Optimization 
training_epochs = 10000
num_cores = 10
repeat_num = 10

ortho_matrix = np.zeros((FeatureNum,4))
ortho_matrix[:7, 0] = np.array([1,0,0,0,0,0,0])
ortho_matrix[:7, 1] = np.array([0,1,0,0,0,0,0])
ortho_matrix[:7, 2] = np.array([0,0,0.5,0.5,0,0,0])
ortho_matrix[:7, 3] = np.array([0,0,0,0,0.2,0.3,0.5])

In [3]:
def data_generator1(DataNum, FeatureNum, corr, proj_matrix, noise_sigma):
    u = np.random.uniform(-1,1, [DataNum, 1])
    t= np.sqrt(corr/(1-corr))
    X = np.zeros((DataNum, FeatureNum))
    for i in range(FeatureNum):
        X[:, i:i+1] = (np.random.uniform(-1,1,[DataNum,1])+t*u)/(1+t)
    Y = np.reshape(2*np.dot(X, proj_matrix[:,0])+0.2*np.exp(-4*np.dot(X, proj_matrix[:,1])) + \
              3*(np.dot(X, proj_matrix[:,2]))**2+2.5*np.sin(np.pi*np.dot(X, proj_matrix[:,3])), [-1,1]) + \
              noise_sigma*np.random.normal(0,1, [DataNum,1])
    return X, Y

In [4]:
np.random.seed(0)
X, Y = data_generator1(DataNum+TestNum, FeatureNum, corr, ortho_matrix, noise_sigma)
scaler_x = MinMaxScaler((-1, 1)); scaler_y = MinMaxScaler((-1, 1))
sX = scaler_x.fit_transform(X); sY = scaler_y.fit_transform(Y)
train_x, test_x, train_y, test_y = train_test_split(sX, sY, test_size = TestNum)

## Grid Search in Parallel

- L1_Penalty_Subnet = [$10^{-1}, 10^{-2}, 10^{-3}$]
- L1_Penalty_Proj = [$10^{-1}, 10^{-2}, 10^{-3}$]
- Smooth_Labmda = [$10^{-5}, 10^{-6}, 10^{-7}$]

In [5]:
def get_mse(scaler_y):
    def metric(label, pred):
        pred = scaler_y.inverse_transform(pred)
        label = scaler_y.inverse_transform(label)
        return np.mean((pred - label)**2)
    return metric

def get_auc(label, pred):
    return roc_auc_score

def sosxnn_tune(simu_dir, task_name, train_x, train_y, test_x, test_y, metric=None,
         input_dummy_num=0,
         subnet_num=10,
         subnet_arch=[10, 6],
         task="Regression",
         activation_func=tf.tanh,
         bn_flag=True,
         lr_bp=0.001,
         lr_cl=0.1,
         l1_proj=0.001,
         l1_subnet=0.001,
         smooth_lambda=0.00001,
         batch_size=1000,
         training_epochs=10000,
         tuning_epochs=500,
         beta_threshold=0.01,
         verbose=False,
         val_ratio=0.2,
         early_stop_thres=1000,
         dummy_name=None):

    np.random.seed(1)
    tf.random.set_seed(1)
    input_num = train_x.shape[1] - input_dummy_num
    model = SOSxNN(input_num=input_num, 
                input_dummy_num=input_dummy_num,
                subnet_num=min(input_num, 10), 
                subnet_arch=subnet_arch,
                task=task,
                activation_func=tf.tanh,
                batch_size=batch_size,
                training_epochs=training_epochs,
                lr_bp=lr_bp,
                lr_cl=lr_cl,
                beta_threshold=beta_threshold,
                tuning_epochs=tuning_epochs,
                l1_proj=l1_proj,
                l1_subnet=l1_subnet,
                smooth_lambda=smooth_lambda,
                verbose=True,
                val_ratio=val_ratio,
                early_stop_thres=early_stop_thres)
    model.fit(train_x, train_y)  
    model.visualize(folder=simu_dir + task_name + "/", 
              name=str(-np.log10(l1_proj)).zfill(2) + "_" + str(-np.log10(l1_subnet)).zfill(2) + "_" +
                    str(-np.log10(smooth_lambda)).zfill(2), 
              dummy_name=dummy_name,
              save_eps=False)
    
    tr_pred = model.predict(model.tr_x) 
    val_pred = model.predict(model.val_x) 
    pred_test = model.predict(test_x)

    if task=="Regression":
        stat = np.hstack([np.round(metric(model.tr_y, tr_pred),5),\
               np.round(metric(model.val_y, val_pred),5),\
               np.round(metric(test_y, pred_test),5)])
    elif task=="Classification":
        stat = np.hstack([np.round(metric(model.tr_y, tr_pred),5),\
               np.round(metric(model.val_y, val_pred),5),\
               np.round(metric(test_y, pred_test),5)])

    res_stat = pd.DataFrame(np.vstack([stat[0],stat[1],stat[2]]).T, columns = ['train_metric', "val_metric", "test_metric"])
    res_stat["Subnet_Number"] = min(input_num, 10)
    res_stat["lr_BP"] = lr_bp
    res_stat["lr_CL"] = lr_cl
    res_stat["L1_Penalty_Proj"] = l1_proj
    res_stat["L1_Penalty_Subnet"] = l1_subnet
    res_stat["Smooth_labmda"] = smooth_lambda
    res_stat["Training_Epochs"] = training_epochs
    return res_stat

In [6]:
cv_results = Parallel(n_jobs=14)(delayed(sosxnn_tune)("./results/", "S1_tune", train_x, train_y, test_x, test_y,\
                                                      subnet_arch=[10,6], metric=get_mse(scaler_y), input_dummy_num=0,\
                      l1_proj=10**(-1-i), l1_subnet=10**(-1-j), smooth_lambda=10**(-5-k), \
                      training_epochs=5000, lr_bp=0.001, lr_cl=0.1, batch_size=1000, early_stop_thres=2500, tuning_epochs=200, \
                      dummy_name=None) for i in range(3) for j in range(3) for k in range(3))

## Show the results

In [7]:
stat = pd.concat(cv_results)
stat.sort_values("val_metric")

Unnamed: 0,train_metric,val_metric,test_metric,Subnet_Number,lr_BP,lr_CL,L1_Penalty_Proj,L1_Penalty_Subnet,Smooth_labmda,Training_Epochs
0,1.0205,1.0397,1.04613,10,0.001,0.1,0.001,0.01,1e-06,5000
0,1.02037,1.04259,1.0482,10,0.001,0.1,0.001,0.01,1e-05,5000
0,1.05516,1.08506,1.08603,10,0.001,0.1,0.001,0.001,1e-05,5000
0,1.05702,1.09011,1.08804,10,0.001,0.1,0.001,0.01,1e-07,5000
0,1.07874,1.10838,1.10596,10,0.001,0.1,0.001,0.001,1e-07,5000
0,1.08374,1.11091,1.1099,10,0.001,0.1,0.001,0.001,1e-06,5000
0,1.10265,1.139,1.13743,10,0.001,0.1,0.1,0.01,1e-07,5000
0,1.1058,1.14341,1.13584,10,0.001,0.1,0.1,0.01,1e-06,5000
0,1.10691,1.14573,1.13758,10,0.001,0.1,0.01,0.001,1e-06,5000
0,1.10699,1.14689,1.13899,10,0.001,0.1,0.01,0.01,1e-07,5000
