# Group lasso for neural networks

## Imports

In [2]:
import numpy as np
import tensorflow as tf
import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense
from keras.regularizers import Regularizer
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from keras.wrappers.scikit_learn import KerasRegressor
import time



Using TensorFlow backend.


## Defining the group lasso penalty

In [2]:

class L21(Regularizer):
    """Regularizer for L21 regularization.
    # Arguments
        C: Float; L21 regularization factor.
    """

    def __init__(self, C=0.):
        self.C = K.cast_to_floatx(C)

    def __call__(self, x):
        const_coeff = np.sqrt(K.int_shape(x)[1])
        return self.C*const_coeff*K.sum(K.sqrt(K.sum(K.square(x), axis=1)))

    def get_config(self):
        return {'C': float(self.l1)}

# Utility function to count active neurons in a Keras model with Dense layers
def count_neurons(model):
    return np.sum([np.sum(np.sum(np.abs(l.get_weights()[0]), axis=1) > 10**-3) \
                          for l in model.layers])

# Callback class to save training loss and the number of neurons
class TrainHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.neurons = [count_neurons(self.model)]

    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.neurons.append(count_neurons(self.model))

## Loading and splitting data

In [12]:
X_trn = np.genfromtxt('../Cleaning/X_train.csv', delimiter='\t')
X_tst = np.genfromtxt('../Cleaning/X_test.csv', delimiter='\t')
y_trn = np.genfromtxt('../Cleaning/Y_train.csv', delimiter='\t')
y_tst = np.genfromtxt('../Cleaning/Y_test.csv', delimiter='\t')
X = np.append(X_trn, X_tst, axis=0)
y = np.append(y_trn, y_tst, axis=0)

In [13]:
# Reset session
# tf.reset_default_graph()
# K.set_session(tf.Session())
# Get callbacks
history = TrainHistory()

## Building the group-lasso penalized model

In [14]:
def create_network():
    model = Sequential()
    model.add(Dense(20, input_dim=X_trn.shape[1],  activation='linear', kernel_regularizer=L21(0.001)))
    model.add(Dense(15, activation='linear', kernel_regularizer=L21(0.001)))
    model.add(Dense(1, activation='linear', kernel_regularizer=L21(0.001)))
    #Compiling
    model.compile(optimizer='adam', loss='mse')
    return model

## Sklearn wrapper for Keras model

In [15]:
neural_network = KerasRegressor(build_fn=create_network, epochs=1000, batch_size=100, verbose=0)

## Calculating cross-validation error

In [16]:
errors = np.abs(cross_val_score(neural_network, X, y, cv=4, scoring='r2', fit_params={'callbacks': [TrainHistory()]}))
np.mean(errors)

KeyboardInterrupt: 

In [4]:
t0 = time.time()
class L21(Regularizer):
    """Regularizer for L21 regularization.
    # Arguments
        C: Float; L21 regularization factor.
    """

    def __init__(self, C=0.):
        self.C = K.cast_to_floatx(C)

    def __call__(self, x):
        const_coeff = np.sqrt(K.int_shape(x)[1])
        return self.C*const_coeff*K.sum(K.sqrt(K.sum(K.square(x), axis=1)))

    def get_config(self):
        return {'C': float(self.l1)}

# Utility function to count active neurons in a Keras model with Dense layers
def count_neurons(model):
    return np.sum([np.sum(np.sum(np.abs(l.get_weights()[0]), axis=1) > 10**-3) \
                          for l in model.layers])

# Callback class to save training loss and the number of neurons
class TrainHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.neurons = [count_neurons(self.model)]

    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.neurons.append(count_neurons(self.model))
        
X_trn = np.genfromtxt('../Cleaning/X_train.csv', delimiter='\t')
X_tst = np.genfromtxt('../Cleaning/X_test.csv', delimiter='\t')
y_trn = np.genfromtxt('../Cleaning/Y_train.csv', delimiter='\t')
y_tst = np.genfromtxt('../Cleaning/Y_test.csv', delimiter='\t')
X = np.append(X_trn, X_tst, axis=0)
y = np.append(y_trn, y_tst, axis=0)
# Reset session
# tf.reset_default_graph()
# K.set_session(tf.Session())
# Get callbacks
history = TrainHistory()
def create_network():
    model = Sequential()
    model.add(Dense(20, input_dim=X_trn.shape[1],  activation='linear', kernel_regularizer=L21(0.001)))
    model.add(Dense(15, activation='linear', kernel_regularizer=L21(0.001)))
    model.add(Dense(1, activation='linear', kernel_regularizer=L21(0.001)))
    #Compiling
    model.compile(optimizer='adam', loss='mse')
    return model

neural_network = KerasRegressor(build_fn=create_network, epochs=10, batch_size=100, verbose=0)
errors = np.abs(cross_val_score(neural_network, X, y, cv=4, scoring='r2', fit_params={'callbacks': [TrainHistory()]}))
print(np.mean(errors))
t1 = time.time()
print(t1-t0)

0.2496306538921148
115.01811838150024
