# Automatic feature selection with LASSO regression

In this notebook we will learn how LASSO (Least Absolute Shrinkage and Selection Operator) regression works and how it can assist in automatically selecting which variables should be included using a **Cross-Validation** perspective.

#### Start by importing packages

In [7]:
import tensorflow as tf
import numpy as np
import tensorflow_datasets
#mnist = tensorflow_datasets.load('mnist')

def run_cnn():
   mnist = tensorflow_datasets.load('mnist')
   learning_rate = 0.0001
   epochs = 10
   batch_size = 50

ModuleNotFoundError: No module named 'tensorflow'

In [6]:
import numpy as np
import tensorflow as tf
from time import time
import math
 
 
from include.data import get_data_set
from include.model import model, lr
 
 
train_x, train_y = get_data_set("train")
test_x, test_y = get_data_set("test")
tf.set_random_seed(21)
x, y, output, y_pred_cls, global_step, learning_rate = model()
global_accuracy = 0
epoch_start = 0
 
 
# PARAMS
_BATCH_SIZE = 128
_EPOCH = 60
_SAVE_PATH = "./tensorboard/cifar-10-v1.0.0/"
 
 
# LOSS AND OPTIMIZER
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                   beta1=0.9,
                                   beta2=0.999,
                                   epsilon=1e-08).minimize(loss, global_step=global_step)
 
 
# PREDICTION AND ACCURACY CALCULATION
correct_prediction = tf.equal(y_pred_cls, tf.argmax(y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
 
 
# SAVER
merged = tf.summary.merge_all()
saver = tf.train.Saver()
sess = tf.Session()
train_writer = tf.summary.FileWriter(_SAVE_PATH, sess.graph)
 
 
try:
    print("Trying to restore last checkpoint ...")
    last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=_SAVE_PATH)
    saver.restore(sess, save_path=last_chk_path)
    print("Restored checkpoint from:", last_chk_path)
except ValueError:
    print("Failed to restore checkpoint. Initializing variables instead.")
    sess.run(tf.global_variables_initializer())
 
 
def train(epoch):
    global epoch_start
    epoch_start = time()
    batch_size = int(math.ceil(len(train_x) / _BATCH_SIZE))
    i_global = 0
 
    for s in range(batch_size):
        batch_xs = train_x[s*_BATCH_SIZE: (s+1)*_BATCH_SIZE]
        batch_ys = train_y[s*_BATCH_SIZE: (s+1)*_BATCH_SIZE]
 
        start_time = time()
        i_global, _, batch_loss, batch_acc = sess.run(
            [global_step, optimizer, loss, accuracy],
            feed_dict={x: batch_xs, y: batch_ys, learning_rate: lr(epoch)})
        duration = time() - start_time
 
        if s % 10 == 0:
            percentage = int(round((s/batch_size)*100))
 
            bar_len = 29
            filled_len = int((bar_len*int(percentage))/100)
            bar = '=' * filled_len + '>' + '-' * (bar_len - filled_len)
 
            msg = "Global step: {:>5} - [{}] {:>3}% - acc: {:.4f} - loss: {:.4f} - {:.1f} sample/sec"
            print(msg.format(i_global, bar, percentage, batch_acc, batch_loss, _BATCH_SIZE / duration))
 
    test_and_save(i_global, epoch)
 
 
def test_and_save(_global_step, epoch):
    global global_accuracy
    global epoch_start
 
    i = 0
    predicted_class = np.zeros(shape=len(test_x), dtype=np.int)
    while i < len(test_x): 
        j = min(i + _BATCH_SIZE, len(test_x)) 
        batch_xs = test_x[i:j, :] 
        batch_ys = test_y[i:j, :] 
        predicted_class[i:j] = sess.run( y_pred_cls, feed_dict={x: batch_xs, y: batch_ys, learning_rate: lr(epoch)} ) 
        i = j 
        correct = (np.argmax(test_y, axis=1) == predicted_class) 
        acc = correct.mean()*100 
        correct_numbers = correct.sum() 
        hours, rem = divmod(time() - epoch_start, 3600) 
        minutes, seconds = divmod(rem, 60) 
        mes = " Epoch {} - accuracy: {:.2f}% ({}/{}) - time: {:0>2}:{:0>2}:{:05.2f}"
        print(mes.format((epoch+1), acc, correct_numbers, len(test_x), int(hours), int(minutes), seconds))
 
    if global_accuracy != 0 and global_accuracy < acc: 
        summary = tf.Summary(value=[ tf.Summary.Value(tag="Accuracy/test", simple_value=acc), ])
        train_writer.add_summary(summary, _global_step) 
        saver.save(sess, save_path=_SAVE_PATH, global_step=_global_step) 
        mes = "This epoch receive better accuracy: {:.2f} > {:.2f}. Saving session..."
        print(mes.format(acc, global_accuracy))
        global_accuracy = acc
 
    elif global_accuracy == 0:
        global_accuracy = acc
 
    print("###########################################################################################################")
 
 
def main():
    train_start = time()
 
    for i in range(_EPOCH):
        print("Epoch: {}/{}".format((i+1), _EPOCH))
        train(i)
 
    hours, rem = divmod(time() - train_start, 3600)
    minutes, seconds = divmod(rem, 60)
    mes = "Best accuracy pre session: {:.2f}, time: {:0>2}:{:0>2}:{:05.2f}"
    print(mes.format(global_accuracy, int(hours), int(minutes), seconds))
 
 
if __name__ == "__main__":
    main()
 
 
sess.close()

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
import numpy as np
import tensorflow as tf
 
from include.data import get_data_set
from include.model import model
 
 
test_x, test_y = get_data_set("test")
x, y, output, y_pred_cls, global_step, learning_rate = model()
 
 
_BATCH_SIZE = 128
_CLASS_SIZE = 10
_SAVE_PATH = "./tensorboard/cifar-10-v1.0.0/"
 
 
saver = tf.train.Saver()
sess = tf.Session()
 
 
try:
    print("
Trying to restore last checkpoint ...")
    last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=_SAVE_PATH)
    saver.restore(sess, save_path=last_chk_path)
    print("Restored checkpoint from:", last_chk_path)
except ValueError:
    print("
Failed to restore checkpoint. Initializing variables instead.")
    sess.run(tf.global_variables_initializer())
 
 
def main():
    i = 0
    predicted_class = np.zeros(shape=len(test_x), dtype=np.int)
    while i < len(test_x):
        j = min(i + _BATCH_SIZE, len(test_x))
        batch_xs = test_x[i:j, :]
        batch_ys = test_y[i:j, :]
        predicted_class[i:j] = sess.run(y_pred_cls, feed_dict={x: batch_xs, y: batch_ys})
        i = j
 
    correct = (np.argmax(test_y, axis=1) == predicted_class)
    acc = correct.mean() * 100
    correct_numbers = correct.sum()
    print()
    print("Accuracy on Test-Set: {0:.2f}% ({1} / {2})".format(acc, correct_numbers, len(test_x)))
 
 
if __name__ == "__main__":
    main()
 
 
sess.close()

In [None]:
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline

# This activity compares different values for regularization parameter ‘alpha’.
# The plot shows that different alphas yield different decision functions.

# Recall that alpha is a parameter for regularization term, aka penalty term, that combats
# overfitting by constraining the size of the weights. Increasing alpha may fix
# high variance (a sign of overfitting) by encouraging smaller weights,
# resulting in a decision boundary plot that appears with lesser curvatures.
# Similarly, decreasing alpha may fix high bias (a sign of underfitting) by
# encouraging larger weights, potentially resulting in a more complicated decision boundary.

# Create a vector of alphas to test.
alphas = np.logspace(-5, 3, 5)

# Assign those alphas to some names.
# Notice also a very cool feature in python called "list comprehension":

# Instead of
# for i in alphas:
#   names.append(alpha ' + str(i))

# List comprehension defines the for loop INSIDE a list.
names = ['alpha ' + str(i) for i in alphas]

# print('names', names)

# Now for the heavy lifting
# We will create a Pipeline of transforms with a final estimator.

# Sequentially apply a list of transforms and a final estimator.
# Intermediate steps of the pipeline must be ‘transforms’, that is,
# they must implement fit and transform methods. The final estimator only
# needs to implement fit.

# The purpose of the pipeline is to assemble several steps that can be cross-validated
# together while setting different parameters.

classifiers = []
for i in alphas:

    # Assign a classifier into the pipeline along with a scaler object.
    classifiers.append(make_pipeline(
                       StandardScaler(),
                       MLPClassifier(solver='lbfgs', alpha=i,
                                     random_state=1, max_iter=2000,
                                     early_stopping=True,
                                     hidden_layer_sizes=[100, 100])
                       ))

# Use one of sklearn's built-in data generators to generate some 2 dimensional (2 feature)
# Data to classify.
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
                           random_state=0, n_clusters_per_class=1)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)

# this generates the following three datasets that each pose unique challenges for classification
datasets = [make_moons(noise=0.3, random_state=0),
            make_circles(noise=0.2, factor=0.5, random_state=1),
            linearly_separable]


# Take a look at the results to see different classifications, along with how they scale with
# the regularization parameter.
figure = plt.figure(figsize=(17, 9))
i = 1

h = .02  # step size in the mesh (the thing we'll actually plot).
# iterate over datasets
for X, y in datasets:
    # preprocess dataset, split into training and test part
    X = StandardScaler().fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4)

    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))

    # just plot the dataset first
    cm = plt.cm.RdBu
    cm_bright = ListedColormap(['#FF0000', '#0000FF'])
    ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
    # Plot the training points
    ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
    # and testing points
    ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
    ax.set_xlim(xx.min(), xx.max())
    ax.set_ylim(yy.min(), yy.max())
    ax.set_xticks(())
    ax.set_yticks(())
    i += 1

    # iterate over classifiers
    for name, clf in zip(names, classifiers):
        ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)

        # Plot the decision boundary. For that, we will assign a color to each
        # point in the mesh [x_min, x_max]x[y_min, y_max].
        if hasattr(clf, "decision_function"):
            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
        else:
            Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

        # Put the result into a color plot
        Z = Z.reshape(xx.shape)
        ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)

        # Plot also the training points
        ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,
                   edgecolors='black', s=25)
        # and testing points
        ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
                   alpha=0.6, edgecolors='black', s=25)

        ax.set_xlim(xx.min(), xx.max())
        ax.set_ylim(yy.min(), yy.max())
        ax.set_xticks(())
        ax.set_yticks(())
        ax.set_title(name)
        ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),
                size=15, horizontalalignment='right')
        i += 1

figure.subplots_adjust(left=.02, right=.98)
plt.show()