# ECON 31703 Problem Set 2 - Arjun Gopinath and Tugce Turk

In [1]:
# Standard Python Imports

import numpy as np
import pandas as pd
from numba import njit, jit
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import matplotlib
from matplotlib import rc
import statsmodels.api as sm
from scipy.stats import norm, zscore
import scipy as sp
from numpy import random, linalg
from scipy import sparse, stats
import itertools as it
from sklearn.preprocessing import StandardScaler as scaler
from sklearn.linear_model import Lasso
import cProfile

matplotlib.rcParams['text.usetex'] = True
matplotlib.rcParams['text.latex.preamble'] = [
    r'\usepackage{amssymb}',
    r'\usepackage{amsmath}',
    r'\usepackage{xcolor}',
    r'\renewcommand*\familydefault{\sfdefault}']
matplotlib.rcParams['pgf.texsystem'] = 'pdflatex'
matplotlib.rcParams['pgf.preamble']  = [
    r'\usepackage[utf8x]{inputenc}',
    r'\usepackage{amssymb}',
    r'\usepackage[T1]{fontenc}',
    r'\usepackage{amsmath}',
    r'\usepackage{sansmath}']

from IPython.display import set_matplotlib_formats
%matplotlib inline
set_matplotlib_formats('svg')

inv, ax, norm = np.linalg.inv, np.newaxis, np.linalg.norm
randint = np.random.randint

  import pandas.util.testing as tm


In [2]:
# Import functions from helper file.

from LASSOHelperAGTT import lasso_objective, lasso_cdg

In [3]:
# Configure dataset size
N_obs = 200
N_param = 120

# Simulate data used in exercise
X, u, b = np.random.randn(N_obs, N_param), np.random.randn(N_obs, 1), np.random.randn(N_param, 1)

# Set intercept
X[:, 0] = 1.

# Random number of coefficients set to zero
n_0 = 70
b[randint(1, N_param, n_0), :] = 0

# Set outcome variable
Y = X @ b + u


## Exercise 1 - Coordinate Gradient Descent in LASSO

### Part A: LASSO Objective Function

We have coded this in `lasso_objective`.

### Part B: Cyclic Coordinate Gradient Descent

We have coded this in `lasso_cdg`.

In [4]:
cProfile.run('lasso_cdg(b_start=0*b, y=Y, X=X, lmbda=0.1)')

         10343 function calls (10275 primitive calls) in 0.627 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       34    0.000    0.000    0.001    0.000 <__array_function__ internals>:2(append)
       34    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(concatenate)
       34    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(dot)
       68    0.000    0.000    0.003    0.000 <__array_function__ internals>:2(norm)
       34    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(ravel)
        2    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(sum)
        1    0.000    0.000    0.626    0.626 <string>:1(<module>)
       34    0.000    0.000    0.006    0.000 LASSOHelperAGTT.py:151(<lambda>)
       30    0.000    0.000    0.000    0.000 LASSOHelperAGTT.py:154(<lambda>)
        1    0.000    0.000    0.000    0.000 LASSOHelperAGTT.py:177(<listcomp>)
  

### Part C: Active Set Strategy

We have coded this in `lasso_cdg` by adding options `active_set` and `active_set_strategy`. Setting `active_set` to True (when the default is false) and specifying the number of iterations after which all $p$ parameters are updated allows this function to implement the Active Set strategy in place of the regular CDG.


In [5]:
cProfile.run('lasso_cdg(b_start=0*b, y=Y, X=X, lmbda=0.1, active_set=True)')

         4886 function calls (4816 primitive calls) in 0.253 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       35    0.000    0.000    0.001    0.000 <__array_function__ internals>:2(append)
       35    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(concatenate)
       35    0.000    0.000    0.001    0.000 <__array_function__ internals>:2(dot)
       70    0.000    0.000    0.003    0.000 <__array_function__ internals>:2(norm)
       35    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(ravel)
        2    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(sum)
       31    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(where)
        1    0.000    0.000    0.253    0.253 <string>:1(<module>)
       35    0.000    0.000    0.007    0.000 LASSOHelperAGTT.py:151(<lambda>)
       31    0.001    0.000    0.001    0.000 LASSOHelperAGTT.py:159(<lambda>)

### Part D: Computing $\lambda_{\max}$
We have coded this in `lambda_zero`.

### Part E: SAFE Strategy

We have coded this in `lasso_cdg` by adding options `safe`. Setting `safe` to True (when the default is false) allows this function to implement the SAFE strategy along with the Active Set strategy.

In [6]:
cProfile.run('lasso_cdg(b_start=0*b, y=Y, X=X, lmbda=0.1, active_set=True, safe=True)')

         4923 function calls (4852 primitive calls) in 0.223 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(amax)
       35    0.000    0.000    0.001    0.000 <__array_function__ internals>:2(append)
       35    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(concatenate)
       36    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(dot)
       72    0.000    0.000    0.004    0.000 <__array_function__ internals>:2(norm)
       35    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(ravel)
        2    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(sum)
       32    0.000    0.000    0.000    0.000 <__array_function__ internals>:2(where)
        1    0.000    0.000    0.223    0.223 <string>:1(<module>)
       35    0.000    0.000    0.008    0.000 LASSOHelperAGTT.py:151(<la

### Part F: Wrapping around multiple $\lambda$ values