### Random Lasso

Description of random lasso portion of project...

In [2]:
# All required packages should go here. No where else. -jmh
import numpy as np
import time
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error
from sklearn import linear_model

In [3]:
# Turning off convergence warnings.
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings(action='ignore', category=ConvergenceWarning)

In [None]:
# Paste RandomLasso.py here. (When complete)

In [1]:
# Paste RegressionHelpers.py here. (When complete)

We know that as the ratio of features to samples increases, the accuracy of any regression algorithm will deteriorate. Let's observe this effect:

In [18]:
import RegressionTests 

# Globals for Testing Lasso and Random Lasso.
tests = 10
start_samples = np.full(tests, 100)
start_features = np.full(tests, 10)
start_informative = np.full(tests, 2)

rme_least_squares = np.zeros(tests)
rme_ridge = np.zeros(tests)
rme_lasso = np.zeros(tests)

runtime_least_squares = np.zeros(tests)
runtime_ridge = np.zeros(tests)
runtime_lasso = np.zeros(tests) 

for ii in range(tests):
    print("------ Test", ii + 1, "of", tests, "------")
    start_samples[ii] = start_samples[ii] * (ii + 1)
    start_features[ii] = start_features[ii] ** ((ii + 1) / 2)
    start_informative[ii] = start_informative[ii] * (2 * (ii + 1))
    cores = (-1 if start_features[ii] >= 400 else 1)
    
    print("Samples:", start_samples[ii],
          "| Features:", start_features[ii],
          "| Informative:", start_informative[ii])
    
    X, y, ground_truth = make_regression(n_samples=start_samples[ii],
                                         n_features=start_features[ii],
                                         n_informative=start_informative[ii],
                                         coef=True)
    
    # Sorting features by their importance. Most important feature in X[:, 0].
    sorted_indices = np.flip(np.argsort(ground_truth))
    ground_truth = ground_truth[sorted_indices]
    X = X[:, sorted_indices]

    #print("Ground Truth:\n", ground_truth.T)

    # Testing Least Squares
    start_time = time.time()
    reg = linear_model.LinearRegression().fit(X, y)
    rme_least_squares[ii], runtime_least_squares[ii] = \
        bulk_analysis_regression(X, y, reg.coef_, ground_truth, "LS", start_time)
    
    # Testing Ridge
    start_time = time.time()
    reg = linear_model.RidgeCV().fit(X, y)
    rme_ridge[ii], runtime_ridge[ii] = \
        bulk_analysis_regression(X, y, reg.coef_, ground_truth, "Ridge", start_time)       

    # Testing Lasso
    start_time = time.time()
    reg = linear_model.LassoCV(n_jobs=cores).fit(X, y)
    rme_lasso[ii], runtime_lasso[ii] = \
        bulk_analysis_regression(X, y, reg.coef_, ground_truth, "Lasso", start_time)

# Graph the degrading performance of the RMSE as the ratio of features to samples increases. -jmh

------ Test 1 of 10 ------
Samples: 100 | Features: 3 | Informative: 4
LS Runtime: 0.0004069805145263672
LS RME: 3.90854283173504e-27
Ridge Runtime: 0.0005638599395751953
Ridge RME: 0.002677362094294139
Lasso Runtime: 0.04295706748962402
Lasso RME: 0.006935778624403897
------ Test 2 of 10 ------
Samples: 200 | Features: 10 | Informative: 8
LS Runtime: 0.0004239082336425781
LS RME: 1.191341030971711e-26
Ridge Runtime: 0.0007410049438476562
Ridge RME: 0.0005947160612619166
Lasso Runtime: 0.04328513145446777
Lasso RME: 0.009143747939684244
------ Test 3 of 10 ------
Samples: 300 | Features: 31 | Informative: 12
LS Runtime: 0.0012819766998291016
LS RME: 1.755724553453542e-27
Ridge Runtime: 0.0033288002014160156
Ridge RME: 0.0002656500193025018
Lasso Runtime: 0.04187178611755371
Lasso RME: 0.004842134820101388
------ Test 4 of 10 ------
Samples: 400 | Features: 100 | Informative: 16
LS Runtime: 0.002468109130859375
LS RME: 2.8037043483118393e-27
Ridge Runtime: 0.004125833511352539
Ridge RME