In [45]:
# This algorithm could be useful for feature selection because we optimize each theta[i]
# seperately, and therefore can watch how performance improves when we add the optimized 
# theta[i] to theta versus the predictability of thetaBar.

# In addition, this algorithm tends to produce thetas with a good deal of zeros, so we can
# filter out any feature i such that theta[i] = 0

import numpy as np

def L1LeastSquaresLinReg(Xtrain, ytrain, Xtest):
    lmbda = 1000
    theta = L1LeastSquares(Xtrain, ytrain, lmbda)
    return Xtest.dot(theta)
        
    
def L1LeastSquares(X, y, lmbda):
    theta = np.zeros(X.shape[1])
    stop = 0
    it = 100
    while (not stop) and (it > 0):
        stop = 1
        for i in range(X.shape[1]):
            ti = theta[i]
            thetaBar = theta[:]
            thetaBar[i] = 0
            s = 1 if ti >= 0 else -1
            #Perform update
            Xi = X[:, i]
            XiT = Xi.transpose()
            theta[i] = (XiT.dot(y - X.dot(thetaBar)) + s*lmbda)/(XiT.dot(Xi))
            #Clip theta's value
            theta[i] = max(theta[i], 0) if s == 1 else min(theta[i], 0)
            if abs(ti - theta[i]) > 10**-5:
                stop = 0
        it = it - 1
    print theta
    return theta
    


In [46]:
#Test

from sklearn.linear_model import LinearRegression
from sklearn import datasets
from numpy.random import permutation
from sklearn.metrics import r2_score
n_samples = 10000


X,y,coeff = datasets.make_regression(n_samples=n_samples, n_features=100,
                                      n_informative=20, noise=20,
                                      coef=True, random_state=0)
# indices = permutation(range(150))
# X = X[indices]
# y = y[indices]

Xtrain = X[:-1000]
ytrain = y[:-1000]
Xtest =  X[-1000:]
ytest =  y[-1000:]

model = LinearRegression()
model.fit(Xtrain, ytrain)

predictionsSK = model.predict(Xtest)
predictionsME = L1LeastSquaresLinReg(Xtrain, ytrain, Xtest)


print r2_score(predictionsSK, ytest)
print r2_score(predictionsME, ytest)



[  2.16058258e-01   0.00000000e+00   4.61689682e+01   1.16278057e-01
   2.34955020e-01   5.34273113e-01   3.79262176e-01   6.16124339e+01
   4.79026364e-01   9.99116677e+01   3.07816442e-01   1.68770369e-01
   2.99164488e-01   6.16103303e+01   3.66459026e-01   0.00000000e+00
   9.66409296e+01   3.97099404e-02   8.14831638e+01   0.00000000e+00
   0.00000000e+00   0.00000000e+00   1.76974651e-01   2.38065618e-01
   2.66696360e-01   3.25256470e-01   3.73899350e-01   1.89356961e-01
   9.97205758e+00   0.00000000e+00   3.92875685e+01   0.00000000e+00
   7.80197603e-03   0.00000000e+00   5.86146050e-01   1.03746556e-02
   2.98848970e-01   4.56426106e+01   0.00000000e+00   3.54739670e+01
   0.00000000e+00   7.58442603e-02   1.63796656e-01   0.00000000e+00
   3.55436704e-01   8.96753973e+01   0.00000000e+00   0.00000000e+00
   3.92118368e-01   5.74128863e-03   0.00000000e+00   1.45684534e-01
   1.53579106e-01   4.97868059e+01   3.53422843e-01   8.53058869e+01
   5.16997359e-01   0.00000000e+00