# Logistic regression with backtracking line search

In [60]:
norm <- function(vec) {return(sqrt(sum(vec^2)))}

# def prox_l2_(beta_j, lam):
#   return np.maximum(1-lam/(np.sqrt(len(beta_j))*l2(beta_j)), 0)*beta_j

prox_l2 <- function(beta_j, lam) {return(beta_j*max(0, 1-lam/norm(beta_j)))}

backtrack <- function(f, df, x, eta, k) 
{
    i <- 1
    alpha <- c(1)
    grad <- df(x)
    search_vec <- -grad
    t <- -k * t(search_vec)%*%grad
    while (f(x)-f(x+alpha[i]*search_vec)<alpha[i]*t)
    {
        alpha <- c(alpha, eta*alpha[i])
        i <- i+1
    }
    return(tail(alpha, 1))
}


armijo_gd <- function(f, df, x, y, partition, t=0.2, eta=0.1, k=0.3, tol=10^-6, lam=0.1, max_iter=1000)
{
    group_num <- length(partition)
    i <- 1
    error <- c(100)
    x_ <- c()
    while(tail(error,1)>tol)
    {
        t <- backtrack(f, df, x, eta, k)
        x <- numeric(length(x))
        for(j in seq(1, group_num))
            {
            group_size <- partition[j]*length(y)
            print(group_size)
            x[j*group_size:(j+1)*group_size] <- prox_l2(x[j*group_size:(j+1)*group_size]-t*df(x)[j*group_size:(j+1)*group_size], lam*t)
            }
        x_ <- c(x_, x)
        i <- i+1
        if(i>max_iter){break}
        error <- c(error, norm(df(x)))
    }
    return (list('min_val'=tail(x_, 1), 'error_vector'=error[-1], 'requested_error_vector'=error2))
}

In [80]:
norm <- function(vec) {return(sqrt(sum(vec^2)))}
prox_l2_ <- function(beta_j, lam) {return(beta_j*max(0, 1-lam/norm(beta_j)))}
prox_l2 <- function(beta, partition, lam)
    {
    l2_vec <- c()
    beta_ <- numeric(length(beta))
    group_num <-length(partition)
    for(j in seq(1, group_num))
        {
        group_size <- partition[j]*length(beta_)
        beta_[j*group_size:(j+1)*group_size] <- prox_l2_(beta[j*group_size:(j+1)*group_size], lam)
        l2_vec <- c(l2_vec, norm(beta_[j*group_size:(j+1)*group_size]))
        }
    penalty <- sum(t(l2_vec)%*%sqrt(partition*length(beta)))
    return(list('beta'=beta_, 'penalty'=penalty))
    }

backtrack <- function(f, df, x, eta, k) 
{
    i <- 1
    alpha <- c(1)
    grad <- df(x)
    search_vec <- -grad
    t <- -k * t(search_vec)%*%grad
    while (f(x)-f(x+alpha[i]*search_vec)<alpha[i]*t)
    {
        alpha <- c(alpha, eta*alpha[i])
        i <- i+1
    }
    return(tail(alpha, 1))
}

fit <- function(X, y, partition, t=0.2, eta=0.1, k=0.3, tol=10^-8, lam=0.1)
{
    d <- length(X[, 1])
    beta <- rep(0, d)
    df <- function(beta_){return(X%*%(sigmoid(t(X)%*%beta_)-y)/length(y))}
    group_num <- length(partition)
    i <- 1
    error <- c(100)
    x_ <- c()
    penalty <- 0
    while(tail(error,1)>tol)
    {
        beta <- c(numeric(d))
        f <- function(beta_){return(mean(-y*log(sigmoid(t(X)%*%beta_))-(1-y)*log(1- sigmoid(t(X)%*%beta_)))+lam*penalty)} 
        t <- backtrack(f, df, beta, eta, k) 
        y_hat <- sigmoid(t(X)%*%beta)
        out <- prox_l2(beta-t*X%*%(y_hat-y)/length(X), partition, lam*t)
        beta <- out[1]
        penalty <- out[2]
        print(beta)
        error <- c(error, norm(y_hat-y))
    }
    return (list('min_val'=tail(beta, 1), 'error_vector'=error[-1]))
}

In [78]:
n <- 1000
d <- 6
x <- matrix(rnorm(n*d), nrow=d)
y <- matrix(rbinom(n=n,prob=sigmoid(t(x)%*%rep(1/sqrt(d), d)), size=1))

In [79]:
fit(x, y, c(1/2, 1/2))

$beta
 [1]  0  0  0  0  0 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
[26] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA



ERROR: Error in lam * penalty: non-numeric argument to binary operator


In [61]:
sigmoid <- function(z){return(1/(1+exp(-z)))}

fit <- function(X, y, lr=0.1, tol=10^-8)
{
    d <- length(X[, 1])
    beta <- rep(0, d)
    loss <- function(beta_){return(mean(-y*log(s)-(1-y)*log(1-s)))} 
    f <- function(beta_){return(loss(sigmoid(t(X)%*%beta_)))}
    df <- function(beta_){return(X%*%(sigmoid(t(X)%*%beta_)-y)/length(y))}
    return(armijo_gd(f, df, beta, rep(1/sqrt(d), d), eta=lr, tol=tol, partition=c(1/2,1/2)))
}

In [62]:
plot_log_error <- function(n, d)
{
    x <- matrix(rnorm(n*d), nrow=d)
    y <- matrix(rbinom(n=n,prob=sigmoid(t(x)%*%rep(1/sqrt(d), d)), size=1))
    result <- fit(x, y)
    plot(result[[3]], xlab="iteration", ylab='log error')
}

In [63]:
plot_log_error(2000, 6)
plot_log_error(5000, 6)

[1] 3
[1] 3


ERROR: Error in t(X) %*% beta_: non-conformable arguments
