In [2]:
using DataFrames, CSV
using ScikitLearn



In [12]:
tap_train = readtable("TAP_train.csv");

In [13]:
size(tap_train)

(152771, 43)

In [14]:
X_train = convert(Array,tap_train[:,1:42]);
y_train = convert(Array,tap_train[:,43]);

In [15]:
n = size(X_train,1)

152771

In [16]:
function prox_l1(x::Number, alpha=1)
    if x > alpha
        return x-alpha
    elseif x < -alpha
        return x + alpha
    else
        return 0
    end
end
function prox_l1(x::AbstractArray, alpha=1)
    return reshape(Float64[prox_l1(xi, alpha) for xi in x], size(x))
end

prox_l1 (generic function with 4 methods)

In [17]:
# proximal gradient method for quadratic loss and l1 regularizer
function proxgrad_quad_l1(X, y; maxiters = 10, stepsize = 1/(2*norm(X))^2, λ = 1, w = zeros(size(X,2)))
    objval = Float64[]
    for i=1:maxiters
        # gradient step
        g = 2X'*(X*w-y) # compute quadratic gradient
        w = w - stepsize*g
        # prox step
        w = prox_l1(w, stepsize*λ)
        # record objective value
        push!(objval, norm(X*w-y)^2 + norm(w,1))
    end
    return w, objval
end

proxgrad_quad_l1 (generic function with 1 method)

In [18]:
function stoch_proxgrad_quad_l1(X, y; maxiters = 50, stepsize = 1/(2*norm(X))^2, λ = 1, w = zeros(size(X,2)))
    objval = Float64[]
    for i=1:maxiters
        # pick random example r
        r = rand(1:n)
        x_r = X[r,:]
        # stochastic gradient step
        g = n*(y[r] - dot(w,x_r))*x_r
        w = w + stepsize*g
        # prox step
        w = prox_l1(w, stepsize*λ)
        # record objective value
        push!(objval, norm(X*w-y)^2 + norm(w,1))
    end
    return w, objval
end

stoch_proxgrad_quad_l1 (generic function with 1 method)

In [19]:
w, obj = stoch_proxgrad_quad_l1(X_train,y_train; maxiters = 50, stepsize = 1/(2*norm(X))^2, λ = 0.01, w = zeros(size(X,2)))

([0.0255086, 1.27146e-5, 4.98684e-6, 3.91733e-6, 1.2715e-5, 0.0357014, 7.33175e-6, 4.27581e-6, 1.107e-6, 4.00504e-10  …  0.0, 0.0, 0.0, 4.22643e-7, 2.69556e-7, 7.96071e-7, 0.0, 0.0, 4.00504e-10, 1.2715e-5], [9.71581e11, 9.64283e11, 9.51891e11, 9.4665e11, 9.17855e11, 8.93083e11, 9.09004e11, 9.05683e11, 8.93078e11, 9.63356e11  …  8.80254e11, 9.48943e11, 8.82044e11, 8.8091e11, 8.85469e11, 9.11932e11, 9.69656e11, 9.20235e11, 8.74885e11, 8.95248e11])

In [20]:
w[1:15]

15-element Array{Float64,1}:
  0.0255086  
  1.27146e-5 
  4.98684e-6 
  3.91733e-6 
  1.2715e-5  
  0.0357014  
  7.33175e-6 
  4.27581e-6 
  1.107e-6   
  4.00504e-10
  0.0        
  2.95374e-6 
 -4.68984e-7 
  3.65848e-6 
  1.58489e-6 

In [21]:
w[16:30]

15-element Array{Float64,1}:
 7.15475e-7
 1.65601e-6
 5.05549e-7
 2.1098e-6 
 0.0       
 2.7005e-6 
 4.01188e-6
 3.54149e-6
 4.72974e-7
 1.98812e-6
 1.12263e-5
 1.48867e-6
 0.0       
 2.02935e-6
 0.0       

In [22]:
w[30:42]

13-element Array{Float64,1}:
 0.0        
 3.80002e-6 
 5.39692e-6 
 0.0        
 0.0        
 0.0        
 4.22643e-7 
 2.69556e-7 
 7.96071e-7 
 0.0        
 0.0        
 4.00504e-10
 1.2715e-5  

In [23]:
w1, obj1 = stoch_proxgrad_quad_l1(X,y; maxiters = 100, stepsize = 1/(2*norm(X))^2, λ = 0.01, w = zeros(size(X,2)))

([0.0559128, 2.77842e-5, 1.46355e-5, 9.97279e-6, 2.78657e-5, 0.0269909, 1.87525e-5, 6.90473e-6, 2.12705e-6, 8.14243e-8  …  0.0, 0.0, 0.0, 7.56696e-7, 0.0, 2.81685e-6, 0.0, -3.18643e-8, 1.13289e-7, 2.78657e-5], [9.93714e11, 9.0402e11, 9.25984e11, 9.27685e11, 9.14728e11, 9.24556e11, 9.22419e11, 9.22283e11, 8.985e11, 9.01256e11  …  8.65461e11, 8.82343e11, 8.65438e11, 8.8105e11, 8.50487e11, 9.05411e11, 9.03756e11, 8.54123e11, 8.53871e11, 8.50011e11])

In [24]:
w1[1:15]

15-element Array{Float64,1}:
 0.0559128 
 2.77842e-5
 1.46355e-5
 9.97279e-6
 2.78657e-5
 0.0269909 
 1.87525e-5
 6.90473e-6
 2.12705e-6
 8.14243e-8
 0.0       
 5.8956e-6 
 6.25276e-7
 4.04904e-6
 2.66028e-6

In [25]:
w1[16:30]

15-element Array{Float64,1}:
 4.70428e-6
 4.91562e-6
 3.03291e-6
 1.98266e-6
 0.0       
 4.94378e-6
 1.06434e-5
 4.89165e-6
 1.46148e-6
 5.92538e-6
 2.39027e-5
 3.68684e-6
 2.76095e-7
 1.9512e-6 
 2.76095e-7

In [26]:
w1[31:42]

12-element Array{Float64,1}:
  6.17335e-6
  1.581e-5  
  0.0       
  0.0       
  0.0       
  7.56696e-7
  0.0       
  2.81685e-6
  0.0       
 -3.18643e-8
  1.13289e-7
  2.78657e-5

In [28]:
tap_test = readtable("TAP_test.csv");

In [37]:
size(tap_test)

(38193, 43)

In [29]:
X_test = convert(Array,tap_test[:,1:42]);
y_test = convert(Array,tap_test[:,43]);

In [30]:
function mean_abs_err(X,y,w)
    n = size(X,1)
    sum = 0
    for i=1:n
        sum += abs(y[i] - X[i,:]'*w)
    end
    return (sum+0.0)/n
end

mean_abs_err (generic function with 1 method)

In [31]:
function mean_abs_percent(X,y,w)
    n = size(X,1)
    sum = 0
    for i=1:n
        sum += abs((y[i] - X[i,:]'*w)/y[i])
    end
    return 100*(sum+0.0)/n
end

mean_abs_percent (generic function with 1 method)

In [44]:
MAE_train = mean_abs_err(X_train, y_train, w)

2066.9712337817136

In [45]:
MAE_test = mean_abs_err(X_test, y_test, w)

2079.170881561422

In [46]:
MAPD_train = mean_abs_percent(X_train, y_train, w)

160.06887151372942

In [47]:
MAPD_test = mean_abs_percent(X_test, y_test, w)

159.9483263969161