In [58]:
library(glmnet)

# 测试 Lasso 回归函数
result <- robust_lasso(simulated_data$X, simulated_data$y, lambda = 0.05, method = "auto")
print(result)

$method
[1] "ISTA"

$fit
$fit$beta
           [,1]
 [1,] 1.4272367
 [2,] 3.0690189
 [3,] 0.7908428
 [4,] 3.4335464
 [5,] 2.8472139
 [6,] 2.2948153
 [7,] 2.5199850
 [8,] 2.1631920
 [9,] 1.4909331
[10,] 1.6115176
[11,] 0.0000000
[12,] 0.0000000
[13,] 0.0000000
[14,] 0.0000000
[15,] 0.0000000
[16,] 0.0000000
[17,] 0.0000000
[18,] 0.0000000
[19,] 0.0000000
[20,] 0.0000000
[21,] 0.0000000
[22,] 0.0000000
[23,] 0.0000000
[24,] 0.0000000
[25,] 0.0000000
[26,] 0.0000000
[27,] 0.0000000
[28,] 0.0000000
[29,] 0.0000000
[30,] 0.0000000
[31,] 0.0000000
[32,] 0.0000000
[33,] 0.0000000
[34,] 0.0000000
[35,] 0.0000000
[36,] 0.0000000
[37,] 0.0000000
[38,] 0.0000000
[39,] 0.0000000
[40,] 0.0000000
[41,] 0.0000000
[42,] 0.0000000
[43,] 0.0000000
[44,] 0.0000000
[45,] 0.0000000
[46,] 0.0000000
[47,] 0.0000000
[48,] 0.0000000
[49,] 0.0000000
[50,] 0.0000000

$fit$iter
[1] 79

$fit$convergence
[1] TRUE




In [5]:
set.seed(123)

# 模拟数据集生成函数
generate_simulated_data <- function(n_samples = 100, n_features = 50, n_informative = 10, noise = 0.1) {
  X <- matrix(rnorm(n_samples * n_features), nrow = n_samples, ncol = n_features)
  beta <- c(rnorm(n_informative, mean = 2), rep(0, n_features - n_informative)) # 稀疏系数
  y <- X %*% beta + rnorm(n_samples, sd = noise) # 响应变量加噪声
  list(X = X, y = as.vector(y), true_beta = beta)
}

# 生成模拟数据
simulated_data <- generate_simulated_data(n_samples = 100, n_features = 50, n_informative = 10, noise = 0.1)
X <- simulated_data$X
y <- simulated_data$y
true_beta <- simulated_data$true_beta


  
  


In [6]:
source("robust_lasso.R")

In [7]:
# 假设 robust_lasso 已实现，返回值包含 method 和 fit 两个部分
result <- robust_lasso(X, y, lambda = 0.05, method = "auto")
print(result$method) # 期望输出 "Lasso"
# 提取拟合系数和截距
estimated_beta <- as.vector(result$fit$beta[-1]) # 去掉截距项
intercept <- result$fit$a0                      # 截距

# 检查维度是否匹配
cat("Dimensions of X:", dim(X), "\n")
cat("Length of estimated_beta:", length(estimated_beta), "\n")

# 确保维度一致后计算预测值
if (ncol(X) != length(estimated_beta)) {
  stop("The dimensions of X and estimated_beta do not match!")
}
y_pred <- X %*% estimated_beta + intercept # 加入截距项



[1] "ISTA"
Dimensions of X: 100 50 
Length of estimated_beta: 50 


In [8]:
# --- 计算拟合质量指标 ---
compute_metrics <- function(true_beta, estimated_beta, y, y_pred) {
  mse <- mean((estimated_beta - true_beta)^2)
  relative_error <- sqrt(sum((estimated_beta - true_beta)^2)) / sqrt(sum(true_beta^2))
  sparsity_match <- sum((estimated_beta != 0) == (true_beta != 0)) / length(true_beta)
  
  # 计算 R^2
  rss <- sum((y - y_pred)^2) # Residual Sum of Squares
  tss <- sum((y - mean(y))^2) # Total Sum of Squares
  r_squared <- 1 - (rss / tss)
  list(MSE = mse, Relative_Error = relative_error, Sparsity_Match = sparsity_match, R_Squared = r_squared)
}

# 计算指标
metrics <- compute_metrics(true_beta, estimated_beta, y, y_pred)

# 显示结果
cat("Mean Squared Error (MSE):", metrics$MSE, "\n")
cat("Relative Error:", metrics$Relative_Error, "\n")
cat("Sparsity Match:", metrics$Sparsity_Match, "\n")
cat("R Squared (R^2):", metrics$R_Squared, "\n")

Mean Squared Error (MSE): 0.000652151 
Relative Error: 0.02424062 
Sparsity Match: 1 
R Squared (R^2): 1 


In [8]:
library(glmnet)

# 下载并加载测试数据集
download_data <- function() {
  # 使用 R 自带的 Boston Housing 数据集
  if (!requireNamespace("MASS", quietly = TRUE)) {
    install.packages("MASS")
  }
  data("Boston", package = "MASS")
  Boston
}


In [9]:
# 数据预处理
prepare_data <- function(data) {
  y <- data$medv  # 房价作为响应变量
  X <- as.matrix(data[, -which(names(data) == "medv")])  # 剔除响应变量列
  return(list(X = X, y = y))
}



In [42]:
# 测试 robust_lasso 和 glmnet
test_lasso <- function(X, y, lambda) {
  # Run custom algorithm
  result_custom <- robust_lasso(X, y, lambda = lambda, method = "auto")  # 调用你实现的函数
  print(result_custom$method)
  
  # Run glmnet for comparison
  fit_glmnet <- glmnet(X, y, alpha = 1, lambda = lambda, intercept = TRUE)
  beta_glmnet <- as.vector(coef(fit_glmnet, s = lambda))  # Includes intercept
  
  # Compute predictions
  y_pred_custom <- cbind(1, X) %*% result_custom$fit$beta  # Robust_lasso predictions
  y_pred_glmnet <- predict(fit_glmnet, X, s = lambda)      # glmnet predictions
  
  # Compute MSE
  mse_custom <- mean((y - y_pred_custom)^2)
  mse_glmnet <- mean((y - y_pred_glmnet)^2)
  
  return(list(
    custom_beta = result_custom$fit$beta,
    glmnet_beta = beta_glmnet,
    mse_custom = mse_custom,
    mse_glmnet = mse_glmnet
  ))
}


In [30]:

# 主测试函数
run_test <- function() {
  # 下载并准备数据
  data <- download_data()
  data_prepared <- prepare_data(data)
  X <- data_prepared$X
  y <- data_prepared$y
  
  # Set lambda for Lasso
  lambda <- 0.1
  
  # Compare robust_lasso and glmnet
  results <- test_lasso(X, y, lambda)
  
  # Print results
  cat("Custom Lasso Coefficients (including intercept):\n")
  print(results$custom_beta)
  cat("\nGLMNet Lasso Coefficients (including intercept):\n")
  print(results$glmnet_beta)
  
  cat("\nMean Squared Error (Custom Lasso):", results$mse_custom, "\n")
  cat("Mean Squared Error (GLMNet):", results$mse_glmnet, "\n")
}



In [36]:
source("../R/robust_lasso.R")



In [None]:
#TODO: 对于Boston Housing数据集，用Coordinate Descent时MSE最小，auto选择的ista第二小
# Run the test
run_test()

[1] "ISTA"
Custom Lasso Coefficients (including intercept):
 [1] 32.266893292 -0.100686904  0.052748641 -0.049005875  0.998965616
 [6] -0.519935233  2.932779546 -0.004849263 -1.200759399  0.298769746
[11] -0.015218729 -0.814947082  0.009751215 -0.618389041

GLMNet Lasso Coefficients (including intercept):
 [1]  29.738867187  -0.073641517   0.030471044   0.000000000   2.589928446
 [6] -13.634864366   4.021614153   0.000000000  -1.153964162   0.138963131
[11]  -0.005083611  -0.889610987   0.008359448  -0.522581066

Mean Squared Error (Custom Lasso): 23.2564 
Mean Squared Error (GLMNet): 22.33831 
