David Fleming Oct 2015

CSE 546 HW 1 Lasso Regression

In [447]:
#Imports
import scipy.sparse as sp
import numpy as np

In [32]:
####
# This is a quick walkthrough to help you understand the operations in scipy.sparse
####

# construct a sparse array, here we simply construct it from dense array
A = np.arange(12).reshape(3,4)
print A
X = sp.csc_matrix(A)

w = np.ones(4)
print
print X.shape, w.shape

#  matrix vector multiplication
y = X.dot(w)
print
print y

#
# dot product between i-th column of X and g
#
i = 0
g = np.ones(3)
# r1 = dot(X[:,i], g), because X takes matrix syntax, we need to do it in this way
r1 = X[:,i].T.dot(g)
print
print r1
#
# This is how you can get dot(X[:,i], X[:,i]) in csc_matix
#
r2 = X[:,i].T.dot(X[:,i])[0,0]
print
print r2

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

(3, 4) (4,)

[  6.  22.  38.]

[ 12.]

80


In [667]:
def generate_norm_data(n,k,d,sigma=1):
    """
    Generates independent data pairs (x_i,y_i) according to the following model:
    
    yi = w*_0 + w*_1x_i_1 + w*2 x_i_2 + ... w*k x_i_k + eps_i
    
    for eps_i = Gaussian noise of the form N(0,sigma^2)
    and each element of X (shape N x d) is from N(0,1)
    
    Parameters
    ----------
    n : int
        Number of samples
    k : int
        k < d number of features for dimensions d
    d : int
        number of dimensions
    sigma : float
        Gaussian error standard deviation
        
    Returns
    -------
    w : vector
        true weight vector
    X : n x d matrix
        data matrix
    y : n x 1 vector
    """
    assert(k < d), "k < d must hold for k: %lf, d: %lf" % (k,d)
    
    #Create w vector
    #Let w0 = 0 and create a w∗ by setting the first k elements to ±10 
    #(choose any sign pattern) and the remaining elements to 0
    w = np.zeros(d)
    for i in range(1,k+1):
        if i < k/2:
            w[i] = 10
        else:
            w[i] = -10
    
    #Generate n x d data matrix X for each element from N(0,1)
    X = sp.csc_matrix(np.random.randn(n, d))
    
    #Generate n x 1 array of Gaussian error samples from N(0,sigma^2)
    eps = np.random.randn(n)
    
    #Finally, generate a Gaussian noise vector eps with variance σ^2 and 
    #form y = Xw* + w*_0 + eps for w*_0 assumed to be 0
    y = X.dot(w) + eps
        
    return w, X, y

In [723]:
def naive_lasso(X,y,l=10,w=None,w_0=None):
    """
    Implimentation of the naive (un-optimized) lasso regression 
    algorithm.
    
    Parameters
    ----------
    X : n x d matrix of data
    X_i : the ith row of X
    y : N x 1 vector of response variables
    w : d dimensions weight vector (optional)
    w_0 : scalar offset term (optional)
    l : regularization tuning parameter
    
    All matrices assumed to be sparse and of the form given by 
    scipy.sparse.csc matrix
    
    Algorithm 1: Coordinate Descent Algorithm for Lasso
    
    while not converged do:
        w_0 <- sum_i=1_N[y_i - sum_j[w_j X_ij]]/N
        for(k [1,d]) do:
            a_k <- 2 * sum_i=1_N[X_ik ^2]
            c_k <- 2 * sum_i=1_N[X_ik (y_i - (w_0 + sum_j!=k[w_j X_ij]))]
            w_k <- (c_k + lambda)/a_k if c_k < -lambda
                    0 if c_k is between [-lambda,lambda]
                    (c_k - lambda)/a_k if c_k > lambda
        end
    end

    Returns
    -------
    w : weight vector
        numpy array
    """
    #Define values
    N = y.shape[0]
    d = X.shape[1]
        
    #Convergence condition
    eps = 1.0e-3
    w_old = np.zeros(w.shape)
    w_pred = np.copy(w)
    
    while((w_pred - w_old).dot(w_pred - w_old) > eps):
        #Store for convergence test 
        w_old = np.copy(w_pred)
        
        #Compute w_0
        w_0 = np.sum(y)
        w_0 -= X.dot(w_pred).sum()
        w_0 /= N
            
        #Compute a_k: d x 1 summing over columns
        a = 2.0*np.asarray((X.power(2).sum(axis=0).T))
        c = np.zeros(d)
            
        for k in range(0,d):
            #Compute c_k: d x 1
            c_sum = 0.0
            for i in range(0,N):
                #Select not k columns
                ind = [x for x in range(0,d) if x != k]
                c_sum += X[i,k]*(y[i] - (X[i,ind].dot(w[ind]) + w_0))
            c[k] = 2.0*c_sum
            
            #Compute w_k
            if(c[k] < -l):
                w_pred[k] = (c[k] + l)/a[k]
            elif(c[k] >= -l and c[k] <= l):
                w_pred[k] = 0.0
            elif(c[k]  > l):
                w_pred[k] = (c[k] - l)/a[k]
            else:
                print "Error! Shouldn't ever happen."
        #end for
        #print w_pred
    #end while
    
    #Return as row array
    return w_pred

# Test things

In [724]:
N = 50
k = 5
d = 75
w, X, y = generate_norm_data(N,k,d)

print(w.shape)
#print
print(X.shape)
#print
print(y.shape)

w_pred = naive_lasso(X,y,l=40,w=w)

#print w
print w
print w_pred

SSres = np.sum(np.power(w_pred-w,2))
w_bar = np.mean(w)
SStot = np.sum(np.power(w_pred-w_bar,2))
print(1.0 - (SSres/SStot))

(75,)
(50, 75)
(50,)
[  0.  10. -10. -10. -10. -10.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.]
[ 0.          9.28782921 -9.3502097  -9.5710217  -9.56268361 -9.59121316
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0