# 1. Help Functions

In [89]:
# Import relevant packages for splitting data
using LinearAlgebra, GLM, DataFrames, Statistics, Random, Distributions, Tables, TableOperations, StatsBase, FreqTables, DataFrames

In [90]:
# Define a function which turn a list or vector-like object into a proper two
# dimensional column vector

function cvec(a)
    """ Turn a list or vector-like object into a proper column vector
    Input
    a: List or vector-like object, has to be a potential input for np.array()
    Output
    vec: two dimensional NumPy array, with the first dimension weakly greater
         than the second (resulting in a column vector for a vector-like input)
    """
    
    # Conver input into a two dimensional NumPy array
    vec = cat([a], dims = 2) 

    # Check whether the second dimension is strictly greater than the first
    # (remembering Python's zero indexing)
    
    if size(vec)[1] < size(vec)[2]
        # If so, transpose the input vector
        vec = transpose(vec)
    end
   
    # Return the column vector
    return vec

end

cvec (generic function with 1 method)

In [91]:
import Statistics.cor
function corre(y, X)
    
    """ Return correlation coefficients between columns of matrices
    Inputs
    y: n by 1 NumPy array
    X: n by k NumPy array
    Outputs
    corr: list of length k, where the k-th element is the correlation
          coefficient between y and the k-th column of X
    """
    # Concatenate y and X into a single NumPy array
    yX = hcat(y, X)
    
    # Get the correlation coefficients between all columns of that array
    corr = cor(yX)
    
    # Get the first row, starting at the first off-diagonal element (these are
    # the correlation coefficients between y and each column of X
    corr = corr[1, :] 
    
    # Return the result
    return corr

end

corre (generic function with 1 method)

In [92]:
function init_values(X, y, number::Int64=5, intercetp::Bool=true)
    """ Return an initial parameter guess for a LASSO model
    Inputs
    y: n by 1 NumPy array, outcome variable
    X: n by k NumPy array, RHS variables
    Outputs
    residuals: n ny 1 NumPy array, residuals for initial parameter guess
    coefficients: k by 1 NumPy array, initial coefficient values
    """
    # Make sure y is a proper column vector
    #y = cvec(y)
    
    # Get the absolute value of correlations between y and X
    corr = broadcast(abs, cor(y, X)[1, :])
    
    # Get the number of columns of X
    kx = size(X)[2]
    
    # Make an index selecting the five columns of X which are most correlated
    # with y (since .argsort() always sorts in increasing order, selecting from
    # the back gets the most highly correlated columns)
    index = sortperm(corr, rev=true)[1: min(number, kx)]
    
    # Set up an array of coefficient guesses
    coefficients = zeros(kx)
    
    # Regress y on the five most correlated columns of X, including an intercept
    # if desired
   reg = lm(X[:, index], y)
    
    # Replace the guesses for the estimated coefficients (note that .coef_ does
    # not return the estimated intercept, if one was included in the model)
    
    coefficients[index] = GLM.coef(reg)
    
    # Replace any NANs as zeros
    replace!(coefficients, NaN=>0)
    
    # Get the regression residuals
    residuals = y - predict(reg, X[:, index])
    
    return residuals, reg, index, coefficients, corr
    #return index
    
end


init_values (generic function with 3 methods)

In [93]:
# function LassoShooting_fit( x, y, lmbda, maxIter::Int = 1000, optTol::Float64 = 10^(-5), zeroThreshold::Float64 = 10^(-6),
#                             XX = nothing, Xy = nothing, beta_start = nothing)

# 2. LassoShooting 

In [None]:
function LassoShooting_fit( x, y, lmbda, maxIter::Int = 1000, 
                            optTol::Float64 = 10^(-5), 
                            zeroThreshold::Float64 = 10^(-6),
                            XX = nothing, Xy = nothing, 
                            beta_start = nothing)
        

In [94]:
# function LassoShooting_fit( x, y, lmbda, control::control, 
#                             XX = nothing, Xy = nothing, beta_start = nothing)

function LassoShooting_fit( x, y, lmbda, maxIter::Int = 1000, optTol::Float64 = 10^(-5), zeroThreshold::Float64 = 10^(-6),
                            XX = nothing, Xy = nothing, beta_start = nothing)
        
     """ Shooting LASSO algorithm with variable dependent penalty weights
    Inputs
    x: n by p NumPy array, RHS variables
    y: n by 1 NumPy array, outcome variable
    lmbda: p by 1 NumPy array, variable dependent penalty terms. The j-th
           element is the penalty term for the j-th RHS variable.
    maxIter: integer, maximum number of shooting LASSO updated
    optTol: scalar, algorithm terminated once the sum of absolute differences
            between the updated and current weights is below optTol
    zeroThreshold: scalar, if any final weights are below zeroThreshold, they
                   will be set to zero instead
    XX: k by k NumPy array, pre-calculated version of x'x
    Xy: k by 1 NumPy array, pre-calculated version of x'y
    beta_start: k by 1 NumPy array, initial weights
    Outputs
    w: k by 1 NumPy array, final weights
    wp: k by m + 1 NumPy array, where m is the number of iterations the
        algorithm took. History of weight updates, starting with the initial
        weights.
    m: integer, number of iterations the algorithm took
    """
    n = size(x)[1]
    p = size(x)[2]
    
    # Check whether XX and Xy were provided, calculate them if not
    if (isnothing(XX))
        XX = x'*x
    end

    if (isnothing(Xy))
        Xy = x'*y
    end

    # Check whether an initial value for the intercept was provided

    if (isnothing(beta_start))
        # If not, use init_values from help_functions, which will return
        # regression estimates for the five variables in x which are most
        # correlated with y, and initialize all other coefficients as zero
        beta = init_values(x, y)[4]

    else
        # Otherwise, use the provided initial weights
        beta = beta_start
    end

    # Set up a history of weights over time, starting with the initial ones
    wp = beta

    # Keep track of the number of iterations
    m = 1

    # Create versions of XX and Xy which are just those matrices times two
    XX2 = XX * 2
    Xy2 = Xy * 2

    #@unpack maxIter, optTol, zeroThreshold = control()

    # Go through all iteration
    while m<maxIter

        # Save the last set of weights (the .copy() is important, otherwise
        # beta_old will be updated every time beta is changed during the
        # following loop)
        beta_old = copy(beta)

        # Go through all parameters
        for j in 1:p
            
            # Calculate the shoot
            S0 = sum( XX2[j, :].*beta ) - XX2[j, j].*beta[j] - Xy2[j]

            # Update the weights
            if sum(isnothing(XX)) >= 1
                beta[j] = 0

            elseif S0 >lmbda[j]
                beta[j] = (lmbda[j] - S0) / XX2[j,j]

            elseif S0 < -lmbda[j]
                beta[j] = (-lmbda[j] - S0) / XX2[j,j]

            elseif broadcast(abs, S0) <= lmbda[j]
                beta[j] = 0

            end
        end

        # Add the updated weights to the history of weights
        wp = hcat(wp, beta)

        # Check whether the weights are within tolerance
        if sum(broadcast(abs, beta - beta_old)) < optTol
            # If so, break the while loop
            break
        end

        # Increase the iteration counter
        m = m + 1
    end

    # Set the final weights to the last updated weights
    w = beta   

    # Set weights which are within zeroThreshold to zero
    w[broadcast(abs, w) .< zeroThreshold] .= 0
    
    #return beta,  w
    return Dict("coefficients" => w, "coef_list" => wp, "num_it" => m)
    #return w, wp, m
    #return XX2, Xy2
    

end
        

LassoShooting_fit (generic function with 7 methods)

# Test 1 

In [163]:
# We have to make sure that both variables are the same type (Integers or floats) to avoid errors when running the regression
n = 10
p = Int(n/2)

X = randn(n, p)
X1 = randn(n, p)

beta = randn(p)
lmbda = randn(p)
Y = randn(n)

10-element Vector{Float64}:
  0.37390574704730123
  0.017400077545956034
 -0.34079976027839404
  0.44819756064873767
 -0.48857866922016874
 -0.07140611979890271
 -0.5138089918840696
 -0.6153753930326106
 -0.7110416761287502
 -0.11921544705726758

In [207]:

X = X + ones(n, 1) * mean.(eachcol(X))'

10×5 Matrix{Float64}:
 -1.96127   -0.451894  -0.981906    0.0594374  -0.307612
  0.177074   0.372834   0.386475   -1.806      -1.7555
  1.48715    0.718511  -0.921566   -1.2937     -0.32001
 -0.158243   0.518559   0.115908   -0.0571268  -0.981779
 -0.73433   -0.411959   1.3431     -1.46352    -0.131019
 -0.709161   0.610106   2.50582    -0.506002    0.205923
  0.299098  -0.937119   0.782664    0.417899   -0.125403
  0.168237  -0.240982  -0.0608883  -2.15825     1.31538
 -0.37152   -2.01625   -0.166188   -1.52091     1.62224
  1.29525   -0.129278  -1.31099    -1.26134     0.256055

In [203]:
function ols(x, y)
    n = size(x)[1]
    reg = lm(x, y)
    coefT = coef( lm(x, y) )
    e1 = y - x*coef( lm(x, y) )
    Ups1 =  (1/sqrt(n)) * sqrt.((e1.^2)' * x.^2)
    return e1
end

ols (generic function with 2 methods)

In [204]:
ols(X, Y)

10-element Vector{Float64}:
 -0.19272054677245015
  0.08388625950445273
 -0.31687396364009834
  0.08079002533795965
 -0.1482481687590621
 -0.011020653003308664
 -0.2753880426845701
 -0.12009332364770431
  0.09506891938445605
  0.1005819966302729

In [197]:
function ols(x, y, post::Bool = True)

    if post   
        reg = lm(x, y)
        coefT = coef( lm(x, y) )
        #coefT[isnan.(coefT)] .= 0
        #e1 = y - x*coef( lm(x, y) )
    else 
        println("alex")
    end
end

ols (generic function with 2 methods)

In [206]:
s1 = sqrt.(var(Y, corrected = true, dims = 1))[1]

0.40209021666442357

In [198]:
ols(X, Y, true)

5-element Vector{Float64}:
 -0.19322825989190803
  0.30009593736425166
 -0.15793908814196286
  0.1972624748915335
 -0.0805863367606081

In [175]:
lm(X, Y)

LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:

Coefficients:
──────────────────────────────────────────────────────────────────
         Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
──────────────────────────────────────────────────────────────────
x1  -0.193228    0.0940277  -2.06    0.0950  -0.434934   0.0484776
x2   0.300096    0.129294    2.32    0.0680  -0.0322649  0.632457
x3  -0.157939    0.0743706  -2.12    0.0871  -0.349115   0.0332367
x4   0.197262    0.0846895   2.33    0.0673  -0.0204388  0.414964
x5  -0.0805863   0.106431   -0.76    0.4831  -0.354176   0.193003
──────────────────────────────────────────────────────────────────


In [148]:
idn3 = broadcast(abs, Y) .> 0

10-element BitVector:
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1

In [155]:
alex = lm(X, Y)

LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:

Coefficients:
─────────────────────────────────────────────────────────────────
         Coef.  Std. Error      t  Pr(>|t|)  Lower 95%  Upper 95%
─────────────────────────────────────────────────────────────────
x1  -0.146846     0.268945  -0.55    0.6085  -0.838192   0.544499
x2  -0.313381     0.214175  -1.46    0.2033  -0.863937   0.237174
x3  -0.0997749    0.310907  -0.32    0.7613  -0.898986   0.699436
x4   0.475867     0.320589   1.48    0.1978  -0.348234   1.29997
x5   0.232848     0.527196   0.44    0.6772  -1.12235    1.58805
─────────────────────────────────────────────────────────────────


In [158]:
coef( lm(X, Y) )

5-element Vector{Float64}:
 -0.14684619584657482
 -0.31338137546329686
 -0.09977492152583385
  0.47586687865370714
  0.23284846560065064

In [153]:
coefT

5-element Vector{Float64}:
 -0.14684619584657482
 -0.31338137546329686
 -0.09977492152583385
  0.47586687865370714
  0.23284846560065064

In [149]:
idn3

10-element BitVector:
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1

In [145]:
ind2 <-  broadcast(abs, Y) > 0

LoadError: UndefVarError: ind2 not defined

In [143]:
ind2

LoadError: UndefVarError: ind2 not defined

In [137]:
lmbda

5-element Vector{Float64}:
 -1.5529768893761908
 -0.27101660706558167
 -1.0111274610384793
  0.6646641746955977
  0.4103123489185525

In [96]:
beta

5-element Vector{Float64}:
 -0.6935209405221874
 -1.025657695122502
  0.7402057245618119
 -0.6332402746921071
 -0.050029723124666116

In [97]:
vcat(beta, lmbda)

10-element Vector{Float64}:
 -0.6935209405221874
 -1.025657695122502
  0.7402057245618119
 -0.6332402746921071
 -0.050029723124666116
 -0.786767414752743
  0.9075976833823601
  1.4476551889226137
  1.3642005089421603
 -1.052386642655459

In [98]:
Y

10-element Vector{Float64}:
 -1.7560846182744394
 -1.6140381293044501
 -0.9061861842298304
  0.10664325217207113
 -0.07019836681773313
  0.5345378609860756
 -0.7177565328892602
  0.2143227864843661
 -0.0005343284603812767
 -0.1739802374195094

In [99]:
lm(X, Y)

LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:

Coefficients:
────────────────────────────────────────────────────────────────
        Coef.  Std. Error      t  Pr(>|t|)  Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────
x1   0.133538    0.558061   0.24    0.8204  -1.301      1.56808
x2  -0.483294    0.555726  -0.87    0.4243  -1.91183    0.945245
x3  -0.582753    0.53177   -1.10    0.3231  -1.94971    0.784206
x4   0.149774    0.380329   0.39    0.7100  -0.827893   1.12744
x5   0.251708    0.302041   0.83    0.4426  -0.524714   1.02813
────────────────────────────────────────────────────────────────


In [100]:
init_values(X, Y)
#

([-1.0592927022431002, -1.3559940631591747, -0.23275519877656015, 0.34324426305674494, -0.43650902902311245, 0.9876462044603046, -0.4973517996718823, -0.2869858288022875, -0.4719748344881069, 0.46703717098671826], LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:

Coefficients:
────────────────────────────────────────────────────────────────
        Coef.  Std. Error      t  Pr(>|t|)  Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────
x1  -0.582753    0.53177   -1.10    0.3231  -1.94971    0.784206
x2   0.133538    0.558061   0.24    0.8204  -1.301      1.56808
x3   0.149774    0.380329   0.39    0.7100  -0.827893   1.12744
x4   0.251708    0.302041   0.83    0.4426  -0.524714   1.02813
x5  -0.483294    0.555726  -0.87    0.4243  -1.91183    0.945245
────────────────────────────────────────────────────────────────
, [3, 1, 4, 5, 2], [0.133537829189903, -0.4832941159913008, -0.5827533685672

In [101]:
LassoShooting_fit(X, Y, lmbda)

Dict{String, Any} with 3 entries:
  "coef_list"    => [0.197053 0.197053 … -0.212414 -0.212417; -0.423626 -0.4236…
  "num_it"       => 27
  "coefficients" => [-0.212417, -0.105611, -0.215399, 0.0, 0.27028]

# Test 2

In [102]:
using RData, LinearAlgebra, GLM, DataFrames, Statistics, Random, Distributions, DataStructures, NamedArrays, PrettyTables

In [103]:
# Importing .Rdata file
growth_read = load("../../data/GrowthData.RData")

# Since growth_read is a dictionary, we check if there is a key called "GrowthData", the one we need for our analyze
haskey(growth_read, "GrowthData")
# Now we save that dataframe with a new name
growth = growth_read["GrowthData"]
names(growth)

Y = growth[!, "Outcome"]
X_2 = select(growth, Not(["Outcome"]))
X_2 = convert(Matrix, Matrix(X_2[:, 2:5]))
lmbda = randn(size(X_2)[2])
lm(X_2, Y)

LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:

Coefficients:
──────────────────────────────────────────────────────────────────────
          Coef.  Std. Error      t  Pr(>|t|)    Lower 95%    Upper 95%
──────────────────────────────────────────────────────────────────────
x1   0.00289408  0.00239153   1.21    0.2295  -0.00186013   0.00764829
x2  -0.0572642   0.023258    -2.46    0.0158  -0.1035      -0.0110289
x3   0.132428    0.0744655    1.78    0.0789  -0.015604     0.280461
x4   0.0845833   0.246612     0.34    0.7324  -0.405665     0.574832
──────────────────────────────────────────────────────────────────────


In [104]:
LassoShooting_fit(X_2, Y, lmbda)["coefficients"]

4-element Vector{Float64}:
 -0.020884476902068058
 -0.1122183810658193
  1.009182059753555
  0.0

# Test 3

In [105]:
# using Pkg
# Pkg.add("CSV")
# Pkg.add("DataFrames")
# Pkg.add("Dates")
# Pkg.add("Plots")
using CSV
using DataFrames
using Dates
#using Plots

In [106]:
#Reading the CSV file into a DataFrame
#We have to set the category type for some variable
data = CSV.File("../../data/wage2015_subsample_inference.csv"; types = Dict("occ" => String,"occ2"=> String,"ind"=>String,"ind2"=>String)) |> DataFrame
println("Number of Rows : ", size(data)[1],"\n","Number of Columns : ", size(data)[2],) #rows
[eltype(col) for col = eachcol(data)]
n = size(data)[1]
z = select(data, Not([:rownames, :lwage, :wage]))
p = size(z)[2]
# remember y has to be a vector
y = data[!, "lwage"]

X_3 = convert(Matrix, Matrix(z[:, 1:5]))
Y_3 = y
lambda_3 = [0.1, 0.2, 0.3, 0.4, 0.5]
#lmbda = randn(size(X_3)[2])

Number of Rows : 5150
Number of Columns : 21


5-element Vector{Float64}:
 0.1
 0.2
 0.3
 0.4
 0.5

In [138]:
LassoShooting_fit(X_3, Y_3, lambda_3)

Dict{String, Any} with 3 entries:
  "coef_list"    => [0.822342 0.822342 … 0.822522 0.822524; 2.43027 2.43027 … 2…
  "num_it"       => 6
  "coefficients" => [0.822524, 2.43023, 2.4558, 2.47384, 2.73382]

# 3. rlasso, lambdaCalculation 

In [109]:
function lambdaCalculation( x, y, lmbda, maxIter::Int = 1000, optTol::Float64 = 10^(-5), zeroThreshold::Float64 = 10^(-6),
                            XX = nothing, Xy = nothing, beta_start = nothing)

end 

lambdaCalculation (generic function with 19 methods)

In [110]:
function lambdaCalculation(     homoskedastic::Bool=false, X_dependent_lambda::Bool=false,
                                lambda_start=nothing, c::Float64=1.1, gamma::Float64=0.1, 
                                numSim::Int=5000, y=nothing, x=nothing, par::Bool=true, 
                                corecap::Float64=Inf, fix_seed::Bool=true)
end

lambdaCalculation (generic function with 19 methods)

In [111]:
function lambdaCalculation(     homoskedastic::Bool=false, X_dependent_lambda::Bool=false,
                                lambda_start=nothing, c::Float64=1.1, gamma::Float64=0.1, 
                                numSim::Int=5000, y=nothing, x=nothing, par::Bool=true, 
                                corecap::Float64=Inf, fix_seed::Bool=true)
    # Get number of observations n and number of variables p
    n, p = size(x)

    # Get number of simulations to use (if simulations are necessary)
    R = numSim

    # Go through all possible combinations of homoskedasticy/heteroskedasticity
    # and X-dependent or independent error terms. The first two cases are
    # special cases: Handling the case there homoskedastic was set to None, and
    # where lambda_start was provided.
    #

    # 1) If homoskedastic was set to None (special case)
    if (isnothing(homoskedastic))

        # Initialize lambda
            lmbda0 = lambda_start

        Ups0 = (1 /sqrt(n)) * sqrt.((y.^2)'*(x.^2))

        # Calculate the final vector of penalty terms
            lmbda = lmbda0 * Ups0

    # 2) If lambda_start was provided (special case)
    elseif (isnothing(lambda_start)) == 0
            # Check whether a homogeneous penalty term was provided (a scalar)
            if maximum(size(lambda_start)) == 1
                # If so, repeat that p times as the penalty term
                lmbda = ones(p,1).*lambda_start

            else
                # Otherwise, use the provided vector of penalty terms as is
                lmbda = lambda_start
            end

    # 3) Homoskedastic and X-independent
    elseif homoskedastic == true &  X_dependent_lambda == false

            # Initilaize lambda
            lmbda0 = 2 * c * sqrt(n) * quantile(Normal(0.0, 1.0),1 - gamma/(2*p))

            # Use ddof=1(corrected = true in Julia) to be consistent with R's var() function (in Julia by defaul the DDF is N-1)
            Ups0 = sqrt(var(y, corrected = true))

            # Calculate the final vector of penalty terms
            lmbda = zeros(p,1) .+ lmbda0 * Ups0

    # 4) Homoskedastic and X-dependent
    elseif homoskedastic == true & X_dependent_lambda == true

            psi = mean.(eachcol(x.^2))
            tXtpsi = (x' ./ sqrt(psi))'

            R = 5000
            sim = zeros(R,1)

            for l in 1:R
                    g = reshape(repeat(randn(n), inner = p),(p, n))'
                    sim[l] = n * maximum(2*abs.(mean.(eachcol(tXtpsi.* g))))
            end

            # Initialize lambda based on the simulated quantiles
            lambda0 = c*quantile(vec(sim), 1 - gamma)

            Ups0 = sqrt(var(y, corrected = true))

            # Calculate the final vector of penalty terms
            lmbda = zeros(p,1) .+ lmbda0 * Ups0

    # 5) Heteroskedastic and X-independent
    elseif homoskedastic == false &  X_dependent_lambda == false

            # The original includes the comment, "1=num endogenous variables"
            lmbda0 = 2 * c * sqrt(n) * quantile(Normal(0.0, 1.0),1 - gamma/(2*p*1))

            Ups0 = (1 /sqrt(n)) * sqrt.((y.^2)'*(x.^2))
            lmbda = lmbda0 * Ups0

    # 6) Heteroskedastic and X-dependent
    elseif homoskedastic == false &  X_dependent_lambda == true

            eh = y
            ehat = reshape(repeat(eh, inner = p),(p, n))'

            xehat = x.*ehat
            psi = mean.(eachcol(xehat.^2))'
            tXehattpsi = (xehat./sqrt.(psi))

            R = 5000
            sim = zeros(R,1)

            for l in 1:R
                    g = reshape(repeat(randn(n), inner = p),(p, n))'
                    sim[l] = n * maximum(2*abs.(mean.(eachcol(tXtpsi.* g))))
            end

            # Initialize lambda based on the simulated quantiles
            lambda0 = c*quantile(vec(sim), 1 - gamma)

            Ups0 = (1 /sqrt(n)) * sqrt.((y.^2)'*(x.^2))

            lmbda = lmbda0 * Ups0

    return Dict("lambda0" => lmbda0, "lambda" => lmbda, "Ups0" => Ups0) 
    end
end

lambdaCalculation (generic function with 19 methods)

In [112]:
#lambdaCalculation()["lambda0"]

In [113]:
# y
# eh = y
# ehat = reshape(repeat(eh, inner = p),(p, n))'
# xehat = x.*ehat
# psi = mean.(eachcol(xehat.^2))'
# tXehattpsi = (xehat./sqrt.(psi))

LoadError: UndefVarError: x not defined

# How to translate Python Class to Julia Function

In [61]:
mutable struct State
    foo::Int
    bar::Float64
end

function dosomething(s::State)
    s.foo + s.bar
end

function dosomethingelse(s::State, n::Int)
    n * s.bar
end


s = State(1, 10)
dosomething(s)  # returns 11
dosomethingelse(s, 10)  # returns 100

100.0

In [62]:
# mutable struct rlasso_arg
#     x::DataFrame
#     y::DataFrame
#     # colnames=nothing
#     rlasso_arg(colnames=nothing) = new(colnames)
# end

In [63]:
# mutable struct rlasso_arg_3
#     x::DataFrame
#     y::DataFrame
#     bar::Float64
# end

In [114]:
# mutable struct rlasso_arg_11
#     #x::DataFrame
#     #y::DataFrame
#     colnames::Nothing
#     #rlasso_arg_10(colnames=nothing) = new(colnames)
#     post::Bool=true
#     intercept::Bool=true
#     model::Bool=true
#     homoskedastic::Bool=false
#     X_dependent_lambda::Bool=false
#     #lambda_start=nothing
#     #rlasso_arg_10(lambda_start=nothing) = new(lambda_start)
#     c::Float64=1.1
#     #gamma=nothing
#     #rlasso_arg_10(lambda_start=nothing) = new(gamma)
#     numSim::Int=5000
#     numIter::Int=15
#     tol::Float64 = 10^(-5)
#     threshold::Float64=-Inf
#     par::Bool=true
#     corecap::Float64=Inf
#     fix_seed::Bool=true
# end

In [115]:
# mutable struct rlasso_arg_10
#     #x::DataFrame
#     #y::DataFrame
#     #colnames::nothing
#     rlasso_arg_10(colnames=nothing) = new(colnames)
#     post::Bool=true
#     intercept::Bool=true
#     model::Bool=true
#     homoskedastic::Bool=false
#     X_dependent_lambda::Bool=false
#     #lambda_start=nothing
#     rlasso_arg_10(lambda_start=nothing) = new(lambda_start)
#     c::Float64=1.1
#     #gamma=nothing
#     rlasso_arg_10(lambda_start=nothing) = new(gamma)
#     numSim::Int=5000
#     numIter::Int=15
#     tol::Float64 = 10^(-5)
#     threshold::Float64=-Inf
#     par::Bool=true
#     corecap::Float64=Inf
#     fix_seed::Bool=true
# end


# # function rlasso(self::rlasso_arg_4)
# #     return self.x
# # end 

In [116]:
mutable struct rlasso_arg_v6
    x::DataFrame
    y::DataFrame
    colnames::Nothing
    #rlasso_arg_v2(colnames=nothing) = new(colnames)
    post::Bool
    intercept::Bool
    model::Bool
    homoskedastic::Bool
    X_dependent_lambda::Bool
    lambda_start::Nothing
    #rlasso_arg_v2(lambda_start=nothing) = new(lambda_start)
    c::Float64
    gamma::Nothing
    #rlasso_arg_v2(lambda_start=nothing) = new(gamma)
    numSim::Int
    numIter::Int
    tol::Float64 
    threshold::Float64
    par::Bool
    corecap::Float64
    fix_seed::Bool
end


# function rlasso(self::rlasso_arg_4)
#     return self.x
# end 

In [121]:
X_4 = DataFrame(X_3, :auto)
Y_4 = DataFrame([y], [:y])
println("alex")

alex


In [130]:
r = rlasso_arg_v6( X_4, Y_4, nothing, true, true, true, false, false, 
                    nothing, 1.1, nothing, 5000, 15, 10^(-5), -Inf, true, Inf, true )

rlasso_arg_v6([1m5150×5 DataFrame[0m
[1m  Row [0m│[1m x1      [0m[1m x2      [0m[1m x3      [0m[1m x4      [0m[1m x5      [0m
[1m      [0m│[90m Float64 [0m[90m Float64 [0m[90m Float64 [0m[90m Float64 [0m[90m Float64 [0m
──────┼─────────────────────────────────────────────
    1 │     1.0      0.0      0.0      0.0      1.0
    2 │     0.0      0.0      0.0      0.0      1.0
    3 │     0.0      0.0      1.0      0.0      0.0
    4 │     1.0      0.0      0.0      0.0      0.0
    5 │     1.0      0.0      0.0      0.0      1.0
    6 │     1.0      0.0      0.0      0.0      1.0
    7 │     1.0      0.0      1.0      0.0      0.0
    8 │     0.0      0.0      1.0      0.0      0.0
    9 │     1.0      0.0      1.0      0.0      0.0
   10 │     1.0      0.0      0.0      0.0      1.0
   11 │     1.0      0.0      1.0      0.0      0.0
  ⋮   │    ⋮        ⋮        ⋮        ⋮        ⋮
 5141 │     1.0      0.0      0.0      0.0      1.0
 5142 │     0.0      0.0    

In [127]:
# function rlasso(self::rlasso_arg_v6)
#     println(self.x)
# end

In [128]:
# rlasso(r)

In [134]:
function rlasso(self::rlasso_arg_v6)
     
    # Initialize internal variables
    if self.x isa DataFrame && isnothing(self.colnames)
        colnames = names(self.x)
        
    end
    
    x = Matrix(self.x)
    y = Matrix(self.y)
    
    n = size(x)[1]
    p = size(x)[2]
    
    if isnothing(self.colnames)
                
        for i in 1:p
            a = "V" * string(i)
            V = append!(V, [a])
        end
        
        colnames  = V
    else
        colnames = colnames
        
    end
    
    # Unused line in the original code
    # ind_names = np.arange(self.p) + 1
    
    post               = self.post
    intercept          = self.intercept
    model              = self.model
    homoskedastic      = self.homoskedastic
    X_dependent_lambda = self.X_dependent_lambda
    lambda_start       = self.lambda_start
    c                  = self.c

    if isnothing(self.gamma)
        gamma = .1 / log(n)
    
    else
        gamma = self.gamma
    end
    
    numSim    = self.numSim
    numIter   = self.numIter
    tol       = self.tol
    threshold = self.threshold

    par       = self.par
    corecap   = self.corecap
    fix_seed  = self.fix_seed
    
    if self.post == false & isnothing(self.c)
        c = 0.5
    end
    
    if ( (self.post == false) & (self.homoskedastic == false)
    & (self.X_dependent_lambda == false)
    & (isnothing(self.lambda_start)) 
    & (self.c == 1.1)
    & (self.gamma == .1 / log(n)) )
        
        c = .5
    end
    
    # For now, instantiate estimate as None
    est = nothing
    
    
    # Calculate robust LASSO coefficients
    if self.intercept == true
        meanx = mean.(eachcol(x))
        x = x - ones(n, 1) * mean.(eachcol(x))'
        mu = mean(y)
        y = y .- mu
        
    else
        meanx = zeros(p, 1)
        mu = 0
    end
    
    normx = sqrt.(var(x, corrected = true, dims = 2))
    Psi = mean.(eachcol(x.^2))
    ind = zeros(Bool, p)
    
    XX = x'*x
    Xy = x'*y
    
    startingval = init_values(x, y)[1]
    
    pen = lambdaCalculation(homoskedastic=self.homoskedastic,
                                X_dependent_lambda=X_dependent_lambda,
                                lambda_start=lambda_start, c=c,
                                gamma=gamma, numSim=numSim,
                                y=startingval, x=x, par=par,
                                corecap=corecap, fix_seed=fix_seed)

    lmbda = pen["lambda"]
    Ups0 = Ups1 = pen["Ups0"]
    lmbda0 = pen["lambda0"]
    
    mm = 1
    s0 = sqrt.(var(y, corrected = true, dims = 1))
    
    while mm <= numIter
        if mm == 1 & post
            coefTemp = LassoShooting_fit(x, y, lmbda/2, XX=XX,
                                      Xy=Xy)["coefficients"]
        else
            coefTemp = (
                    LassoShooting_fit(x, y, lmbda, XX=XX,
                                      Xy=Xy)["coefficients"]
                )
        end
        
        coefTemp[isnan.(coefTemp)] .= 0
            
        ind1 <-  broadcast(abs, coefTemp) > 0
        
        x1 = x[:, ind1]
        
        if size(x1)[2] == 0
            if intercept
                intercept_value = mean(y .+ mu)
                
                coef = zeroz(p+1, 1)
                coef = DataFrame([ append!(["Intercept"], colnames), coef ], :auto)
               
                #coef = 
            
            else
                intercept_value = mean(y)
                
                coef = zeroz(p, 1)
                
                coef = DataFrame([ colnames, coef ], :auto)
            end
            
            
            est = Dict("coefficients"=> coef,
                    "beta"=> zeroz(p, 1),
                    "intercept"=> intercept_value,
                    "index"=> DataFrame([ colnames, zeros(Bool, p) ], :auto),
                    "lambda"=> lmbda,
                    "lambda0"=> lmbda0,
                    "loadings"=> Ups0,
                    "residuals"=> y .- mean(y),
                    "sigma"=> var(y, corrected = true, dims = 1),
                    "iter"=> mm,
                    #"call"=> Not a Python option
                    "options"=> Dict("post"=> post, "intercept"=> intercept,
                                "ind.scale"=> ind, "mu"=> mu, "meanx"=> meanx)
                   )
                
            if self.model
                    est["model"] = x
            else
                est["model"] = nothing
            
            end 
            
            est["tss"] = sum((y .- mean(y)).^2)
            est["rss"] = sum((y .- mean(y)).^2)
            est["dev"] = y .- mean(y)
        
        end 
        
        # Refinement variance estimation
        if self.post
            
            reg = lm(x1, y)
            
            coefT = coef(lm(x1, y))
            
            coefT[isnan.(coefT)] .= 0
            
            e1 = y - x1*coef(lm(x, y))
            
            coefTemp[ind1] = coefT
            
        else
            e1 = y - x1*coefTemp[ind1]
            
        end
        
        s1 = sqrt.(var(y, corrected = true, dims = 1))
                
        # Homoskedastic and X-independent
        if (
                    (self.homoskedastic == true) 
                    & (self.X_dependent_lambda == false)
            )
            
            Ups1 = s1 * Psi
            lmbda = pen["lambda0"] * Ups1
            
        # Homoskedastic and X-dependent
        elseif (
                    (self.homoskedastic == true)
                    & (self.X_dependent_lambda == true)
            )

            Ups1 = s1 * Psi

            lmbda = pen["lambda0"] * Ups1

        # Heteroskedastic and X-independent
        elseif (
                (self.homoskedastic == false)
                & (self.X_dependent_lambda == false)
            )
                        
            Ups1 =  (1/sqrt(n)) * sqrt.((e1.^2) * x.^2)
            
            lmbda = pen["lambda0"] * Ups1
        
        # Heteroskedastic and X-dependent
        elseif (
                (self.homoskedastic == false)
                & (self.X_dependent_lambda == true)
                )
            
            lc = lambdaCalculation(homoskedastic=homoskedastic,
                       X_dependent_lambda=X_dependent_lambda,
                       lambda_start=lambda_start,
                       c=c, gamma=gamma,
                       numSim=numSim, y=e1, x=x,
                       par=par, corecap=corecap,
                       fix_seed=fix_seed)

            Ups1 = lc["Ups0"]

            lmbda = lc["lambda"]
        
        # If homoskedastic is set to None
        elseif isnothing(self.homoskedastic)
            
            Ups1 =  (
                    (1/sqrt(n)) * sqrt.((e1.^2) * x.^2)
                )
            
            lmbda = pen["lambda0"] * Ups1
        end
        
        mm = mm + 1
        
        if broadcast(abs, s0 - s1) < self.tol
            break
            
        end
        
        s0 = s1
        
    end
    
     if size(x1)[1] == 0
        #coefTemp = None
        ind1 = zeros(p, 1)
    end
    
    coefTemp = coefTemp
    
    coefTemp[broadcast(abs, coefTemp) .< threshold] = 0
    
    coefTemp = DataFrame([ colnames, coefTemp ], :auto)

    ind1 = ind1
    
    ind1 = DataFrame([ colnames, ind1 ], :auto)
    
    if self.intercept
        
        if isnothing(mu)
            mu = 0
        end
        
        if isnothing(meanx)
            meanx = zeros( size(coefTemp)[1], 1)
        end
        
        if sum(ind) == 0
            intercept_value = mu - sum(meanx * coefTemp)
        else
            intercept_value = mu - sum(meanx * coefTemp)
        end
    
    else
        intercept_value = NaN
    end
    
    #s1 = sqrt.(var(e1, corrected = true, dims = 1))
    
    if self.intercept
        beta = vcat(intercept_value, coefTemp.x2)
        
        beta = DataFrame([ append!(["Intercept"], colnames), beta ], :auto)
    
    else
        beta = coefTemp
        
    end
    
    s1 = sqrt.(var(e1, corrected = true, dims = 1))
    
    
    est = Dict(
    "coefficients"=> beta,
    "beta"=> DataFrame([ colnames, coefTemp ], :auto), 
    "intercept"=> intercept_value,
    "index"=> ind1,
    "lambda"=> DataFrame([ colnames, lmbda ], :auto),
    "lambda0"=> lmbda0,
    "loadings"=> Ups1,
    "residuals"=> e1,
    "sigma"=> s1,
    "iter"=> mm,
    #"call"=> Not a Python option
    "options"=> Dict("post"=> self.post, "intercept"=> self.intercept,
                "ind.scale"=> ind, "mu"=> mu, "meanx"=> meanx),
    "model"=> model
    )
    
    if model
        x = x + ones(n, 1) * mean.(eachcol(x))'
        
        est["model"] = x
        
    else
        est["model"] = nothing
        
    end
    
    est["tss"] = sum((y .- mean(y)).^2)
    est["rss"] = sum((y .- mean(y)).^2)
    est["dev"] = y .- mean(y)
end

rlasso (generic function with 1 method)

In [133]:
est = Dict()
est["alex"] = 125

125

In [71]:
X_2 = X - ones(n, 1) * mean.(eachcol(X))'

LoadError: DimensionMismatch("dimensions must match: a has dims (Base.OneTo(10), Base.OneTo(5)), b has dims (Base.OneTo(5150), Base.OneTo(5)), mismatch at 1")

In [72]:
vcat(beta, lmbda)

9-element Vector{Float64}:
 -0.6819308619005207
 -0.6801564958943725
  1.8474563821667511
 -0.6299737595373538
 -1.0155336331137883
  0.9317973382691673
 -0.08685797753730269
  0.8892244072892007
  1.3003293796679618

In [73]:
DataFrame([ beta, lmbda ], :auto).x2

LoadError: DimensionMismatch("column :x1 has length 5 and column :x2 has length 4")

In [74]:
X.^2

10×5 Matrix{Float64}:
 0.427841   0.102074    0.0339901   0.0638288    1.42107
 0.898827   1.06102     7.37136e-5  0.71534      0.0918065
 2.98238    0.0749167   0.51014     0.649773     1.86656
 3.00234    0.164921    2.32591     0.0299429    0.0282363
 2.51798    0.0487381   5.69287     0.000695903  0.403914
 0.0527992  3.76062     0.478894    0.333837     1.06878
 0.485929   1.71171     0.00367897  1.74633      0.96211
 0.251804   0.00861262  1.61532     0.062932     1.19706
 0.0207983  0.0928515   0.116479    1.6737       2.74736
 0.771301   0.131784    0.198429    3.19888      0.422412

In [75]:
sqrt.(var(Y, corrected = true, dims = 1))

1-element Vector{Float64}:
 0.05131416452031087

In [76]:
#ind1 = [true, true, false, false, true, false, false, false, false, true]
ind1 = [true, true, false, false, true]
coefTemp[ind1]

LoadError: UndefVarError: coefTemp not defined

In [77]:
coefTemp = coef(lm(X, Y))

LoadError: DimensionMismatch("second dimension of A, 10, does not match length of x, 90")

In [78]:
ols = lm(X, Y)

LoadError: DimensionMismatch("second dimension of A, 10, does not match length of x, 90")

In [79]:
X1

10×5 Matrix{Float64}:
 -0.232196  -0.52875      1.14067     0.532009  -0.27117
  1.22915    0.00327654   0.780061   -2.05658   -1.11411
 -1.54678   -1.37554      0.789872    0.70519   -2.34992
 -1.01527   -0.997256    -0.0964263   2.54386    1.36113
 -2.28876   -0.0816941   -0.386873   -1.60585   -1.03189
 -0.193157  -1.6776       0.723324    0.891942  -0.817945
 -1.87176   -0.00877358   1.31417     1.73964    1.11977
  1.56158   -0.0475094   -0.62577    -0.531155  -0.116328
 -0.666547   1.56711      2.46869    -0.964327   0.476674
 -0.190461  -0.633914    -0.9885     -0.990128   1.12453

In [80]:
Y - X1*coef(lm(X, Y))

LoadError: DimensionMismatch("second dimension of A, 10, does not match length of x, 90")

In [81]:
Y - predict(ols, X1)

LoadError: UndefVarError: ols not defined

In [82]:
y .- mean(y)

5150-element Vector{Float64}:
 -0.7074223224507943
  0.9020155899833062
 -0.5676603800756355
 -0.33585876601831144
  0.3911899662173153
 -0.5085714637056289
 -0.014275141890848886
 -0.014275141890848886
 -0.48588005250355826
 -0.014275141890848886
 -0.11963565754867522
 -0.4842787711365846
 -0.4842787711365846
  ⋮
  0.010417470699522369
  0.08103503791347588
  0.8479483687130305
  0.1469930057052733
 -0.14780653451537162
  0.20886840942336082
 -0.34277920886288493
 -0.27824068772531385
  0.16804641490310557
  0.6788720386690965
  0.524721358841838
 -0.11963565754867522

In [83]:
mean(Y)

0.04534949282222222

In [84]:
(Y .- mean(Y))

90-element Vector{Float64}:
 -0.06968524382222221
  0.05512307417777778
  0.021701989177777777
  0.018739673177777784
 -0.01741994482222222
  0.0010579461777777827
  0.021982847177777785
 -0.02437181282222222
 -0.011798256822222221
 -0.0062029698222222165
  0.030777014177777777
  0.08260171617777778
 -0.06967558182222222
  ⋮
  0.04775542017777778
  0.019879067177777786
 -0.00725446982222222
 -0.01113649082222222
  0.007409645177777778
 -0.006933854822222217
 -0.01345470282222222
 -0.01415350782222222
 -0.011253836822222217
  0.0015509691777777779
 -0.005576120822222218
 -0.004707954822222221

In [85]:
sum((Y .- mean(Y)).^2)

0.2343497697571603

In [86]:
d = Dict()

d["a"]=(1,2,3)

(1, 2, 3)

In [87]:
d["b"]=(7,9)

(7, 9)

In [88]:
d

Dict{Any, Any} with 2 entries:
  "b" => (7, 9)
  "a" => (1, 2, 3)

In [111]:
Y .+ 5

10-element Vector{Float64}:
 3.319558909091376
 4.93427542408587
 4.938764943089442
 6.571265589643371
 4.86780209537569
 4.00695840525545
 3.8012345930177553
 2.0223724144902326
 5.052896004215135
 5.48220271958761

In [108]:
X[:, [true, false, true, true, true]]

10×4 Matrix{Float64}:
 -1.37249     0.413456    1.4489     1.16042
  1.76501    -0.636814    0.770967  -0.333
 -0.0694807   0.431079    0.199816  -0.745393
 -0.177394    0.139683    3.06382    1.80425
  0.726266    1.04664    -1.38691   -0.0867943
  0.194283   -0.561051   -2.22438   -1.41627
  2.20072    -0.624947    0.284193  -1.13353
  0.793911    0.500419   -1.25645    0.0716802
  0.425668   -0.0600364   0.902487  -0.259147
  1.23246    -0.657438   -0.543199  -1.33069

In [103]:
corr = broadcast(abs, cor(Y, X)[1:[true, false], :])


LoadError: MethodError: no method matching (::Colon)(::Int64, ::Vector{Bool})
[0mClosest candidates are:
[0m  (::Colon)(::T, ::Any, [91m::T[39m) where T<:Real at C:\Users\Alexander\AppData\Local\Programs\Julia-1.7.2\share\julia\base\range.jl:41
[0m  (::Colon)(::A, ::Any, [91m::C[39m) where {A<:Real, C<:Real} at C:\Users\Alexander\AppData\Local\Programs\Julia-1.7.2\share\julia\base\range.jl:10
[0m  (::Colon)(::T, ::Any, [91m::T[39m) where T at C:\Users\Alexander\AppData\Local\Programs\Julia-1.7.2\share\julia\base\range.jl:40
[0m  ...

In [98]:
broadcast(abs, X[1, :])

5-element Vector{Float64}:
 1.3724938687984305
 1.5418519220139395
 0.4134563475774925
 1.448896561688129
 1.160424749166399

In [99]:
X

10×5 Matrix{Float64}:
 -1.37249    -1.54185     0.413456    1.4489     1.16042
  1.76501    -1.92393    -0.636814    0.770967  -0.333
 -0.0694807   0.0921964   0.431079    0.199816  -0.745393
 -0.177394   -0.414105    0.139683    3.06382    1.80425
  0.726266    0.483002    1.04664    -1.38691   -0.0867943
  0.194283    0.0867137  -0.561051   -2.22438   -1.41627
  2.20072    -0.601646   -0.624947    0.284193  -1.13353
  0.793911   -0.524062    0.500419   -1.25645    0.0716802
  0.425668   -0.0799985  -0.0600364   0.902487  -0.259147
  1.23246    -1.26661    -0.657438   -0.543199  -1.33069

In [79]:
isnan.(X)

10×5 BitMatrix:
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0
 0  0  0  0  0

In [78]:
post = ture 

if post
    println("alex")
end

In [76]:
if 




sqrt.(var(Y, corrected = true, dims = 1))

1-element Vector{Float64}:
 0.05131416452031087

In [72]:
zeros(Bool, p)

18-element Vector{Bool}:
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0

In [62]:
Ups0 = sqrt(var(X, corrected = true))

0.9525827662949281

In [69]:
sqrt.(var(X, corrected = true, dims = 2))

10×1 Matrix{Float64}:
 0.8872304800676166
 1.28378375529363
 0.6822911784830161
 1.089947029012406
 0.8467359972262604
 0.8639820580162922
 0.9196825344835341
 1.0090973828808507
 1.5310211172177406
 0.556429603250096

In [60]:
X

10×5 Matrix{Float64}:
 -1.38106   -0.115624    0.0376426   0.893737     0.655332
  1.5352     0.543936   -0.264749   -1.6361      -1.18055
 -0.134788  -0.0948142   0.263126   -1.1845       0.643543
 -0.739486   0.945522   -0.395543    1.17396     -1.34242
 -0.247314  -0.354279    1.49503     1.26713      0.443634
  1.14562   -0.788751    0.261493   -0.999771    -0.252403
 -0.714057  -0.586036    0.500456    0.780652     1.44014
 -1.42593    0.243058   -0.0153428   1.10612     -0.997152
 -2.55791   -1.75044     0.603033    0.179438     0.879666
 -0.287261   0.768072   -0.0117123   0.00387802   0.999846

90-element Vector{Float64}:
 -0.06968524382222221
  0.05512307417777778
  0.021701989177777777
  0.018739673177777784
 -0.01741994482222222
  0.0010579461777777827
  0.021982847177777785
 -0.02437181282222222
 -0.011798256822222221
 -0.0062029698222222165
  0.030777014177777777
  0.08260171617777778
 -0.06967558182222222
  ⋮
  0.04775542017777778
  0.019879067177777786
 -0.00725446982222222
 -0.01113649082222222
  0.007409645177777778
 -0.006933854822222217
 -0.01345470282222222
 -0.01415350782222222
 -0.011253836822222217
  0.0015509691777777779
 -0.005576120822222218
 -0.004707954822222221

18×1 Matrix{Float64}:
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

In [38]:
mean(Y)

0.04534949282222222

In [34]:
mean.(eachcol(X))

5-element Vector{Float64}:
 -0.48069838929607134
 -0.11893522809546746
  0.24734294864612436
  0.15845491038505846
  0.12896378140007056

In [307]:
n = 10

10

In [308]:
ones(n, 1)

10×1 Matrix{Float64}:
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0
 1.0

In [321]:
X

10×5 Matrix{Float64}:
 -0.0489688  -0.275824    1.63593   -0.00648359   0.325701
  0.242576    0.906716   -0.547771   0.284369     1.25943
  1.19262    -0.202613   -0.537657  -0.556403     0.347462
  1.22757     1.32089    -0.783089  -0.427006     1.13879
 -1.38191    -1.34955    -0.305437   1.44057      0.677275
 -1.03997    -1.82096    -0.481649   0.789954    -1.53124
 -0.842654    0.701064   -0.120484   1.10418      0.0155075
  2.55307     0.0215592   0.47075   -0.632074     0.790734
  0.0491994   0.855299   -1.00134   -0.437576     0.533224
  0.150925   -1.09241     0.343678  -0.0364082    1.18734

In [319]:
mean.(eachcol(X))'

1×5 adjoint(::Vector{Float64}) with eltype Float64:
 0.210247  -0.0935832  -0.132707  0.152313  0.474422

In [37]:
X- ones(10, 1) * mean.(eachcol(X))'

10×5 Matrix{Float64}:
 -0.90036    0.00331095  -0.2097      0.735282    0.526368
  2.0159     0.662872    -0.512092   -1.79455    -1.30952
  0.34591    0.0241211    0.0157827  -1.34296     0.514579
 -0.258788   1.06446     -0.642886    1.0155     -1.47138
  0.233384  -0.235344     1.24768     1.10868     0.31467
  1.62632   -0.669816     0.0141496  -1.15823    -0.381366
 -0.233359  -0.467101     0.253113    0.622197    1.31118
 -0.94523    0.361993    -0.262686    0.947666   -1.12612
 -2.07721   -1.6315       0.35569     0.0209836   0.750702
  0.193438   0.887007    -0.259055   -0.154577    0.870882

In [330]:
ones(n, 1)*Matrix(mean(eachcol(X)))

LoadError: MethodError: no method matching (Matrix)(::Vector{Float64})
[0mClosest candidates are:
[0m  (Array{T, N} where T)(::AbstractArray{S, N}) where {S, N} at C:\Users\Alexander\AppData\Local\Programs\Julia-1.7.2\share\julia\base\boot.jl:479
[0m  (Matrix)([91m::Union{QR, LinearAlgebra.QRCompactWY}[39m) at C:\Users\Alexander\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\LinearAlgebra\src\qr.jl:441
[0m  (Matrix)([91m::UpperTriangular{T, S} where S<:AbstractMatrix{T}[39m) where T at C:\Users\Alexander\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\LinearAlgebra\src\triangular.jl:34
[0m  ...

In [289]:
n = 100
gamma = 1 / log(n)

0.21714724095162588

In [201]:
colnames  = nothing



if X_4 isa DataFrame && (isnothing(nothing))
    println("alex")
end 

alex


In [123]:
a = ["alex", "maria"]
append!(a, ["carlos"])

3-element Vector{String}:
 "alex"
 "maria"
 "carlos"

In [275]:
a = "V" * "1"

"V1"

In [114]:
V = []

Any[]

In [115]:
for i in 1:5
    a = "V" * string(i)
    V = append!(V, [a])
end
    

In [116]:
V

5-element Vector{Any}:
 "V1"
 "V2"
 "V3"
 "V4"
 "V5"

In [124]:
append!(["Intercept"], V)

6-element Vector{String}:
 "Intercept"
 "V1"
 "V2"
 "V3"
 "V4"
 "V5"

In [118]:
V_2 = [1, 2, 3, 4, 5]

5-element Vector{Int64}:
 1
 2
 3
 4
 5

In [121]:
DataFrame([V, V_2 ], :auto)

Unnamed: 0_level_0,x1,x2
Unnamed: 0_level_1,Any,Any
1,V1,1
2,V2,2
3,V3,3
4,V4,4
5,V5,5


In [219]:
rlasso(r)

5


In [162]:
r = rlasso_arg_v2(  nothing, true, true, true, false, false, 
                    nothing, 1.1, nothing, 5000, 15, 10^(-5), -Inf, true, Inf, true )


# self, x, y, colnames=None, post=True, intercept=True,
#              model=True, homoskedastic=False, X_dependent_lambda=False,
#              lambda_start=None, c=1.1, gamma=None, numSim=5000, numIter=15,
#              tol=10**(-5), threshold=-np.inf, par=True, corecap=np.inf,
#              fix_seed=True)

LoadError: MethodError: no method matching rlasso_arg_v2(::Nothing, ::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::Nothing, ::Float64, ::Nothing, ::Int64, ::Int64, ::Float64, ::Float64, ::Bool, ::Float64, ::Bool)
[0mClosest candidates are:
[0m  rlasso_arg_v2(::Any) at In[161]:15

In [151]:
r = rlasso_arg_8(X_4,Y_4, true, true)


[1m5150×5 DataFrame[0m
[1m  Row [0m│[1m x1      [0m[1m x2      [0m[1m x3      [0m[1m x4      [0m[1m x5      [0m
[1m      [0m│[90m Float64 [0m[90m Float64 [0m[90m Float64 [0m[90m Float64 [0m[90m Float64 [0m
──────┼─────────────────────────────────────────────
    1 │     1.0      0.0      0.0      0.0      1.0
    2 │     0.0      0.0      0.0      0.0      1.0
    3 │     0.0      0.0      1.0      0.0      0.0
    4 │     1.0      0.0      0.0      0.0      0.0
    5 │     1.0      0.0      0.0      0.0      1.0
    6 │     1.0      0.0      0.0      0.0      1.0
    7 │     1.0      0.0      1.0      0.0      0.0
    8 │     0.0      0.0      1.0      0.0      0.0
    9 │     1.0      0.0      1.0      0.0      0.0
   10 │     1.0      0.0      0.0      0.0      1.0
   11 │     1.0      0.0      1.0      0.0      0.0
   12 │     0.0      0.0      1.0      0.0      0.0
   13 │     1.0      0.0      0.0      1.0      0.0
   14 │     1.0      0.0      0.0      

 5150 │     0.0      0.0      0.0      0.0      0.0[1m5150×1 DataFrame[0m
[1m  Row [0m│[1m y       [0m
[1m      [0m│[90m Float64 [0m
──────┼─────────
    1 │ 2.26336
    2 │ 3.8728
    3 │ 2.40313
    4 │ 2.63493
    5 │ 3.36198
    6 │ 2.46222
    7 │ 2.95651
    8 │ 2.95651
    9 │ 2.48491
   10 │ 2.95651
   11 │ 2.85115
   12 │ 2.48651
   13 │ 2.48651
   14 │ 2.59984
   15 │ 2.79399
   16 │ 3.32808
   17 │ 3.07269
   18 │ 2.158
   19 │ 2.95651
   20 │ 2.57922
   21 │ 2.36872
   22 │ 2.44569
   23 │ 2.87855
   24 │ 2.95651
   25 │ 2.79143
   26 │ 2.95651
   27 │ 2.66883
   28 │ 2.48491
   29 │ 2.82298
   30 │ 2.97911
   31 │ 3.74497
   32 │ 2.63493
   33 │ 2.31215
   34 │ 2.64285
   35 │ 2.48205
   36 │ 2.44569
   37 │ 3.17966
   38 │ 2.78946
   39 │ 2.44569
   40 │ 3.71865
   41 │ 3.07429
   42 │ 2.41752
   43 │ 1.90669
   44 │ 2.31215
   45 │ 2.48651
   46 │ 2.64066
   47 │ 3.36198
   48 │ 1.97568
   49 │ 2.46631
   50 │ 3.29298
   51 │ 2.5601
   52 │ 2.57085
   53 │ 2.73

 5150 │ 2.85115


In [175]:
mutable struct rlasso_arg_8
    x::DataFrame
    y::DataFrame
    #colnames::nothing
    #rlasso_arg_7(colnames=nothing) = new(colnames)
    post::Bool
    intercept::Bool
end
    
function rlasso(self::rlasso_arg_8)
    println(self.x, self.y)
end

rlasso (generic function with 4 methods)

In [None]:
function rlasso(self, x, y, colnames=None, post=True, intercept=True,
             model=True, homoskedastic=False, X_dependent_lambda=False,
             lambda_start=None, c=1.1, gamma=None, numSim=5000, numIter=15,
             tol=10**(-5), threshold=-np.inf, par=True, corecap=np.inf,
             fix_seed=True)
end 

In [179]:
r = rlasso_arg_8(X_4, Y_4, true, true)

rlasso(r)

[1m5150×5 DataFrame[0m
[1m  Row [0m│[1m x1      [0m[1m x2      [0m[1m x3      [0m[1m x4      [0m[1m x5      [0m
[1m      [0m│[90m Float64 [0m[90m Float64 [0m[90m Float64 [0m[90m Float64 [0m[90m Float64 [0m
──────┼─────────────────────────────────────────────
    1 │     1.0      0.0      0.0      0.0      1.0
    2 │     0.0      0.0      0.0      0.0      1.0
    3 │     0.0      0.0      1.0      0.0      0.0
    4 │     1.0      0.0      0.0      0.0      0.0
    5 │     1.0      0.0      0.0      0.0      1.0
    6 │     1.0      0.0      0.0      0.0      1.0
    7 │     1.0      0.0      1.0      0.0      0.0
    8 │     0.0      0.0      1.0      0.0      0.0
    9 │     1.0      0.0      1.0      0.0      0.0
   10 │     1.0      0.0      0.0      0.0      1.0
   11 │     1.0      0.0      1.0      0.0      0.0
   12 │     0.0      0.0      1.0      0.0      0.0
   13 │     1.0      0.0      0.0      1.0      0.0
   14 │     1.0      0.0      0.0      

 5150 │     0.0      0.0      0.0      0.0      0.0[1m5150×1 DataFrame[0m
[1m  Row [0m│[1m y       [0m
[1m      [0m│[90m Float64 [0m
──────┼─────────
    1 │ 2.26336
    2 │ 3.8728
    3 │ 2.40313
    4 │ 2.63493
    5 │ 3.36198
    6 │ 2.46222
    7 │ 2.95651
    8 │ 2.95651
    9 │ 2.48491
   10 │ 2.95651
   11 │ 2.85115
   12 │ 2.48651
   13 │ 2.48651
   14 │ 2.59984
   15 │ 2.79399
   16 │ 3.32808
   17 │ 3.07269
   18 │ 2.158
   19 │ 2.95651
   20 │ 2.57922
   21 │ 2.36872
   22 │ 2.44569
   23 │ 2.87855
   24 │ 2.95651
   25 │ 2.79143
   26 │ 2.95651
   27 │ 2.66883
   28 │ 2.48491
   29 │ 2.82298
   30 │ 2.97911
   31 │ 3.74497
   32 │ 2.63493
   33 │ 2.31215
   34 │ 2.64285
   35 │ 2.48205
   36 │ 2.44569
   37 │ 3.17966
   38 │ 2.78946
   39 │ 2.44569
   40 │ 3.71865
   41 │ 3.07429
   42 │ 2.41752
   43 │ 1.90669
   44 │ 2.31215
   45 │ 2.48651
   46 │ 2.64066
   47 │ 3.36198
   48 │ 1.97568
   49 │ 2.46631
   50 │ 3.29298
   51 │ 2.5601
   52 │ 2.57085
   53 │ 2.73

 5150 │ 2.85115


In [None]:
function rlasso( self::rlasso_arg)
    if self.x isa DataFrame & isnothing(self.colnames)
        colnames = names(self.x)
        
        
    
    
    
    
    
    
    
    
    
    
    
    
    
    
end 

In [56]:
names(X_4)

5-element Vector{String}:
 "x1"
 "x2"
 "x3"
 "x4"
 "x5"

In [101]:
Y_4 isa DataFrame

true

In [118]:
X_4 = DataFrame(X_3, :auto)
Y_4 = DataFrame([y], [:y])

Unnamed: 0_level_0,y
Unnamed: 0_level_1,Float64
1,2.26336
2,3.8728
3,2.40313
4,2.63493
5,3.36198
6,2.46222
7,2.95651
8,2.95651
9,2.48491
10,2.95651


In [23]:
    def __init__(self, x, y, colnames=None, post=True, intercept=True,
                 model=True, homoskedastic=False, X_dependent_lambda=False,
                 lambda_start=None, c=1.1, gamma=None, numSim=5000, numIter=15,
                 tol=10**(-5), threshold=-np.inf, par=True, corecap=np.inf,
                 fix_seed=True):

LoadError: syntax: extra token "State" after end of expression

In [154]:
xehat.^2

5×5 Matrix{Float64}:
  9.39943    9.39943    9.39943    9.39943    9.39943
 17.3247    17.3247    17.3247    17.3247    17.3247
  0.211723   0.211723   0.211723   0.211723   0.211723
  1.00107    1.00107    1.00107    1.00107    1.00107
  5.10129    5.10129    5.10129    5.10129    5.10129

In [143]:
xehat

5×5 Matrix{Float64}:
  3.06585    3.06585    3.06585    3.06585    3.06585
 -4.16229   -4.16229   -4.16229   -4.16229   -4.16229
 -0.460134  -0.460134  -0.460134  -0.460134  -0.460134
 -1.00054   -1.00054   -1.00054   -1.00054   -1.00054
  2.2586     2.2586     2.2586     2.2586     2.2586

In [147]:
sqrt.(psi)

1×5 Matrix{Float64}:
 2.57053  2.57053  2.57053  2.57053  2.57053

In [136]:
xehat

5×5 Matrix{Float64}:
  3.06585    3.06585    3.06585    3.06585    3.06585
 -4.16229   -4.16229   -4.16229   -4.16229   -4.16229
 -0.460134  -0.460134  -0.460134  -0.460134  -0.460134
 -1.00054   -1.00054   -1.00054   -1.00054   -1.00054
  2.2586     2.2586     2.2586     2.2586     2.2586

In [135]:
psi

1×5 adjoint(::Vector{Float64}) with eltype Float64:
 -0.0597025  -0.0597025  -0.0597025  -0.0597025  -0.0597025

In [134]:
sqrt(psi)

LoadError: MethodError: no method matching sqrt(::Adjoint{Float64, Vector{Float64}})
[0mClosest candidates are:
[0m  sqrt([91m::Union{Float32, Float64}[39m) at C:\Users\Alexander\AppData\Local\Programs\Julia-1.7.2\share\julia\base\math.jl:566
[0m  sqrt([91m::StridedMatrix{T}[39m) where T<:Union{Real, Complex} at C:\Users\Alexander\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\LinearAlgebra\src\dense.jl:836
[0m  sqrt([91m::Diagonal[39m) at C:\Users\Alexander\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\LinearAlgebra\src\diagonal.jl:592
[0m  ...

In [128]:
psi = mean.(eachcol(xehat))'

1×5 adjoint(::Vector{Float64}) with eltype Float64:
 -0.0597025  -0.0597025  -0.0597025  -0.0597025  -0.0597025

In [116]:
y*ones(1, p)

10×5 Matrix{Float64}:
  0.638277   0.638277   0.638277   0.638277   0.638277
 -0.805969  -0.805969  -0.805969  -0.805969  -0.805969
 -0.399263  -0.399263  -0.399263  -0.399263  -0.399263
  0.203019   0.203019   0.203019   0.203019   0.203019
  0.46159    0.46159    0.46159    0.46159    0.46159
  1.1679     1.1679     1.1679     1.1679     1.1679
  0.561702   0.561702   0.561702   0.561702   0.561702
  0.737444   0.737444   0.737444   0.737444   0.737444
  0.224127   0.224127   0.224127   0.224127   0.224127
  0.584296   0.584296   0.584296   0.584296   0.584296

In [112]:
y.*ones(p,1)

LoadError: DimensionMismatch("arrays could not be broadcast to a common size; got a dimension with lengths 10 and 5")

In [126]:
x.^2

10×5 Matrix{Float64}:
 6.75773    0.015731    0.0767582    1.65955     1.00093
 0.0538316  3.58128     0.815611     5.85853     0.0883306
 0.788887   2.33335     0.00285879   0.331446    0.00152463
 0.191249   5.21704     4.22627      0.171546    0.0827277
 0.965387   0.132943    0.809392     2.37874     3.58377
 0.772758   0.00414472  0.000177881  0.00404976  4.88835
 0.607025   2.40655     0.04054      0.637564    0.519543
 0.183133   0.371022    0.0116339    0.0102402   0.0178766
 2.12905    2.47393     0.134531     3.28602     0.118159
 0.414669   3.62391     0.00735741   2.71472     2.94232

In [125]:
psi = mean(x.^2)

1.3888938829818727

In [109]:
Ups0 = (1 /sqrt(n)) * sqrt.((y.^2)'*(x.^2))

1×5 Matrix{Float64}:
 0.687098  0.726379  0.306074  0.79703  0.953105

In [None]:
Ups0 <- 1/sqrt(n) * sqrt(t(t(y^2) %*% (x^2)))

In [None]:
Ups0 <- sqrt(var(y))

In [75]:
repeat(randn(10), inner = p)

50-element Vector{Float64}:
 -0.5426606288066931
 -0.5426606288066931
 -0.5426606288066931
 -0.5426606288066931
 -0.5426606288066931
  1.2813129314021943
  1.2813129314021943
  1.2813129314021943
  1.2813129314021943
  1.2813129314021943
 -1.4541396281032473
 -1.4541396281032473
 -1.4541396281032473
  ⋮
 -0.537725859397369
 -0.537725859397369
  0.6457751734360266
  0.6457751734360266
  0.6457751734360266
  0.6457751734360266
  0.6457751734360266
 -0.40593715325349267
 -0.40593715325349267
 -0.40593715325349267
 -0.40593715325349267
 -0.40593715325349267

In [78]:
g = reshape(repeat(randn(10), inner = p),(5, 10))'

10×5 adjoint(::Matrix{Float64}) with eltype Float64:
  0.705075   0.705075   0.705075   0.705075   0.705075
  1.51667    1.51667    1.51667    1.51667    1.51667
 -0.728552  -0.728552  -0.728552  -0.728552  -0.728552
  0.125279   0.125279   0.125279   0.125279   0.125279
  1.42334    1.42334    1.42334    1.42334    1.42334
 -0.600246  -0.600246  -0.600246  -0.600246  -0.600246
 -0.893547  -0.893547  -0.893547  -0.893547  -0.893547
  0.236745   0.236745   0.236745   0.236745   0.236745
 -0.789944  -0.789944  -0.789944  -0.789944  -0.789944
 -0.147113  -0.147113  -0.147113  -0.147113  -0.147113

In [74]:
matrix(repeat(randn(10), inner = p))

LoadError: UndefVarError: matrix not defined

In [97]:
R = 100
sim = zeros(R,1)
for l in 1:R
    g = reshape(repeat(randn(10), inner = p),(5, 10))'
    sim[l] = maximum(2*abs.(mean.(eachcol(tXtpsi.* g))))
end




In [None]:
lambda0 = c*quantile(vec(sim), 1 - gamma)

In [81]:
g = reshape(repeat(randn(10), inner = p),(5, 10))'

10×5 adjoint(::Matrix{Float64}) with eltype Float64:
 -0.651544  -0.651544  -0.651544  -0.651544  -0.651544
 -0.449609  -0.449609  -0.449609  -0.449609  -0.449609
 -1.22734   -1.22734   -1.22734   -1.22734   -1.22734
 -0.939588  -0.939588  -0.939588  -0.939588  -0.939588
  0.87259    0.87259    0.87259    0.87259    0.87259
 -0.557327  -0.557327  -0.557327  -0.557327  -0.557327
 -0.232092  -0.232092  -0.232092  -0.232092  -0.232092
  0.523636   0.523636   0.523636   0.523636   0.523636
 -0.377307  -0.377307  -0.377307  -0.377307  -0.377307
  1.73438    1.73438    1.73438    1.73438    1.73438

100-element Vector{Float64}:
 0.9058930636221756
 0.9974339887492555
 0.8915361518976817
 0.4059838910163601
 0.7794933188038025
 0.744080936762073
 0.4901078406889143
 1.604911856999283
 0.5387011545821052
 1.8805858054106046
 1.1352847145790745
 0.6435084321974089
 0.6898252417611304
 ⋮
 0.8961491292420162
 0.842828918284426
 1.2523321965848266
 1.2214504204417402
 0.5665618497153938
 0.7392078708070684
 1.1226014126091135
 1.1197690588942042
 0.8127075867966795
 0.4388331744185597
 0.6802999047321783
 0.9591691065536855

In [104]:
quantile(vec(sim), 0.5)

0.9904178448293439

In [106]:
vector = [6.52276955293844, 6.210569320588091, 4.7864301479581, 9.1187992502725, 8.42000515087866, 6.17048265730705]

6-element Vector{Float64}:
 6.52276955293844
 6.210569320588091
 4.7864301479581
 9.1187992502725
 8.42000515087866
 6.17048265730705

In [108]:
quantile(vector, 1 - gamma)

8.76940220057558

In [72]:
n * max(2 * abs(colMeans(tXtpsi * g)))

10×5 adjoint(::Matrix{Float64}) with eltype Float64:
  2.2058     0.106425    0.235087    1.0931     -0.848923
 -0.196872   1.60577     0.766314    2.05381     0.252186
 -0.753655   1.29615     0.0453687   0.488508    0.033132
  0.371078  -1.93811    -1.74439    -0.351444    0.244057
  0.833712   0.309384    0.763387   -1.3087     -1.60633
  0.745911   0.0546277   0.011317    0.0539983   1.87606
 -0.661103  -1.31632    -0.170847    0.677528    0.611613
 -0.363119   0.516851    0.0915226   0.0858657  -0.113451
 -1.23811   -1.33463     0.311227   -1.53816     0.291675
  0.546407  -1.6153      0.0727827   1.39807     1.45549

In [86]:
tXtpsi

10×5 adjoint(::Matrix{Float64}) with eltype Float64:
  2.2058     0.106425    0.235087    1.0931     -0.848923
 -0.196872   1.60577     0.766314    2.05381     0.252186
 -0.753655   1.29615     0.0453687   0.488508    0.033132
  0.371078  -1.93811    -1.74439    -0.351444    0.244057
  0.833712   0.309384    0.763387   -1.3087     -1.60633
  0.745911   0.0546277   0.011317    0.0539983   1.87606
 -0.661103  -1.31632    -0.170847    0.677528    0.611613
 -0.363119   0.516851    0.0915226   0.0858657  -0.113451
 -1.23811   -1.33463     0.311227   -1.53816     0.291675
  0.546407  -1.6153      0.0727827   1.39807     1.45549

0.40868440531922995

In [93]:
mean.(eachcol(tXtpsi.* g))

5-element Vector{Float64}:
  0.09175619074607241
 -0.20434220265961497
  0.18418159031061226
 -0.01841595885434937
 -0.006452323111749836

In [58]:
Ups0 = sqrt(var(y, corrected = true))

0.5723082480468004

In [45]:
alex = 45
mute = alex

45

In [48]:
if (isnothing(mute))  == 0
    println(alex)
end 

45


In [47]:
isnothing(mute)

false

In [4]:
# We have to make sure that both variables are the same type (Integers or floats) to avoid errors when running the regression
n = 10
p = Int(n/2)

x = randn(n, p)
beta = randn(p)
lmbda = randn(p)
y = randn(n)
gamma = 0.1
c = 1.1


1.1

In [9]:
psi = mean.(eachcol(x.^2))



tXtpsi = (x' ./ sqrt.(psi))'

10×5 adjoint(::Matrix{Float64}) with eltype Float64:
 -0.276339    0.638309    0.44012    -1.99609    -0.696107
 -0.930588   -0.792754    1.56848     1.2595     -0.37923
  0.559535   -0.345888   -1.43638    -0.323765   -0.636522
  0.286299   -0.313657   -1.47366     0.0882716  -1.68812
  1.16986     0.0302914  -0.349372    1.23167     1.11022
 -2.53313     2.02852    -1.46518    -0.828138   -0.438008
  0.0846915  -0.327124   -0.641858    0.886998    0.800242
  0.814653   -0.0714077   0.302399    0.539086    0.211448
  0.210722    0.221536    0.0479571  -1.01492    -1.73667
 -0.402537    2.11403     0.580321   -0.0798286   0.99555

In [85]:
tXtpsi = (x' ./ sqrt(psi))'

10×5 adjoint(::Matrix{Float64}) with eltype Float64:
  2.2058     0.106425    0.235087    1.0931     -0.848923
 -0.196872   1.60577     0.766314    2.05381     0.252186
 -0.753655   1.29615     0.0453687   0.488508    0.033132
  0.371078  -1.93811    -1.74439    -0.351444    0.244057
  0.833712   0.309384    0.763387   -1.3087     -1.60633
  0.745911   0.0546277   0.011317    0.0539983   1.87606
 -0.661103  -1.31632    -0.170847    0.677528    0.611613
 -0.363119   0.516851    0.0915226   0.0858657  -0.113451
 -1.23811   -1.33463     0.311227   -1.53816     0.291675
  0.546407  -1.6153      0.0727827   1.39807     1.45549

In [54]:
quantile(Normal(0.0, 1.0),1 - gamma/(2*p))

2.326347874040846

In [55]:
# Initilaize lambda
lmbda0 = 2 * c * sqrt(n) * quantile(Normal(0.0, 1.0),1 - gamma/(2*p))

16.184427406091057

In [32]:
sqrt((y.^2.0)'*(x.^2.0))

LoadError: MethodError: no method matching sqrt(::Adjoint{Float64, Vector{Float64}})
[0mClosest candidates are:
[0m  sqrt([91m::Union{Float32, Float64}[39m) at C:\Users\Alexander\AppData\Local\Programs\Julia-1.7.2\share\julia\base\math.jl:566
[0m  sqrt([91m::StridedMatrix{T}[39m) where T<:Union{Real, Complex} at C:\Users\Alexander\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\LinearAlgebra\src\dense.jl:836
[0m  sqrt([91m::Diagonal[39m) at C:\Users\Alexander\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\LinearAlgebra\src\diagonal.jl:592
[0m  ...

In [None]:
Ups0 = (1 / np.sqrt(n)) * np.sqrt((y**2).T @ (x**2)).T

In [None]:
lambda0 <- 2 * penalty$c * sqrt(n) * qnorm(1 - penalty$gamma/(2 *p))

In [None]:
intercetp::Bool=true

In [None]:
LassoShooting_fit(X, Y, lmbda)

In [None]:
init_values(X, Y)[4]

In [None]:
reg = lm(X, Y)

In [None]:
homoskedastic=False, X_dependent_lambda=False,
                      lambda_start=None, c=1.1, gamma=0.1, numSim=5000, y=None,
                      x=None, par=True, corecap=np.inf, fix_seed=True

In [None]:
init_values(X, Y)

In [None]:
X

In [1]:
import Pkg; Pkg.add("RData")
import Pkg; Pkg.add("CodecBzip2")
import Pkg; Pkg.add("DataStructures")
import Pkg; Pkg.add("NamedArrays")
import Pkg; Pkg.add("PrettyTables")
import Pkg; Pkg.add("Lasso")

[32m[1m    Updating[22m[39m registry at `C:\Users\Alexander\.julia\registries\General.toml`
│   exception = Downloads.RequestError("https://pkg.julialang.org/registries", 35, "schannel: failed to receive handshake, SSL/TLS connection failed", Downloads.Response("https", "https://sa.pkg.julialang.org/registries", 301, "HTTP/1.1 301 SA internal redirect trigger", ["connection" => "close", "content-length" => "0", "server" => "Varnish", "retry-after" => "0", "location" => "https://sa.pkg.julialang.org/registries", "x-geo-continent" => "SA", "x-geo-country" => "PE", "x-geo-region" => "CAL", "accept-ranges" => "bytes", "date" => "Sun, 13 Mar 2022 15:56:16 GMT", "via" => "1.1 varnish", "x-served-by" => "cache-lim12125-LIM", "x-cache" => "HIT", "x-cache-hits" => "0", "x-timer" => "S1647186976.287239,VS0,VE0"]))
└ @ Pkg.Registry C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.7\Pkg\src\Registry\Registry.jl:82
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  N

LoadError: Unable to automatically install 'Bzip2' from 'C:\Users\Alexander\.julia\packages\Bzip2_jll\iOonP\Artifacts.toml'

In [2]:
using Pkg

Pkg.PlatformEngines.probe_platform_engines!()

Pkg.PlatformEngines.download("https://github.com/JuliaBinaryWrappers/MKL_jll.jl/releases/download/MKL-v2020.0.166%2B0/MKL.v2020.0.166.x86_64-apple-darwin14.tar.gz", "MKL_jll.tar.gz"; verbose=true)

LoadError: HTTP/1.1 302 Found (Send failure: Connection was reset) while requesting https://github.com/JuliaBinaryWrappers/MKL_jll.jl/releases/download/MKL-v2020.0.166%2B0/MKL.v2020.0.166.x86_64-apple-darwin14.tar.gz

In [3]:
curl --version

LoadError: syntax: invalid operator "--"

In [5]:
using RData, LinearAlgebra, GLM, DataFrames, Statistics, Random, Distributions, DataStructures, NamedArrays, PrettyTables

In [13]:
# Importing .Rdata file
growth_read = load("../../data/GrowthData.RData")

# Since growth_read is a dictionary, we check if there is a key called "GrowthData", the one we need for our analyze
haskey(growth_read, "GrowthData")
# Now we save that dataframe with a new name
growth = growth_read["GrowthData"]
names(growth)

Y = growth[!, "Outcome"]
Y_2 = DataFrame([Y], [:Y])
X_2 = select(growth, Not(["Outcome"]))

Dict{String, Any} with 1 entry:
  "GrowthData" => [1m90×63 DataFrame[0m…

Unnamed: 0_level_0,intercept,gdpsh465,bmp1l,freeop,freetar,h65,hm65,hf65,p65
Unnamed: 0_level_1,Int32,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,1,6.59167,0.2837,0.153491,0.043888,0.007,0.013,0.001,0.29
2,1,6.82979,0.6141,0.313509,0.061827,0.019,0.032,0.007,0.91
3,1,8.89508,0.0,0.204244,0.009186,0.26,0.325,0.201,1.0
4,1,7.56528,0.1997,0.248714,0.03627,0.061,0.07,0.051,1.0
5,1,7.1624,0.174,0.299252,0.037367,0.017,0.027,0.007,0.82
6,1,7.21891,0.0,0.258865,0.02088,0.023,0.038,0.006,0.5
7,1,7.8536,0.0,0.182525,0.014385,0.039,0.063,0.014,0.92
8,1,7.70391,0.2776,0.215275,0.029713,0.024,0.035,0.013,0.69
9,1,9.06346,0.0,0.109614,0.002171,0.402,0.488,0.314,1.0
10,1,8.15191,0.1484,0.110885,0.028579,0.145,0.173,0.114,1.0


In [28]:
X_2 = convert(Matrix, Matrix(X_2[:, 2:5]))
Y_2 = convert(Matrix, Matrix(Y_2))
lmbda = randn(size(X_2)[2])

4-element Vector{Float64}:
  0.2453949358186934
 -0.08642608748746254
 -0.6178751708887436
 -0.23957066264789037

In [29]:
lm(X_2, Y_2)

LoadError: MethodError: no method matching fit(::Type{LinearModel}, ::Matrix{Float64}, ::Matrix{Float64}, ::Nothing)
[0mClosest candidates are:
[0m  fit(::Type{LinearModel}, ::AbstractMatrix{<:Real}, [91m::AbstractVector{<:Real}[39m, ::Union{Nothing, Bool}; wts, dropcollinear) at C:\Users\Alexander\.julia\packages\GLM\gt3bb\src\lm.jl:161
[0m  fit([91m::Type{StatsBase.Histogram}[39m, ::Any...; kwargs...) at C:\Users\Alexander\.julia\packages\StatsBase\pJqvO\src\hist.jl:383
[0m  fit(::Type{T}, [91m::FormulaTerm[39m, ::Any, ::Any...; contrasts, kwargs...) where T<:RegressionModel at C:\Users\Alexander\.julia\packages\StatsModels\57Kc9\src\statsmodel.jl:78
[0m  ...

In [30]:
X_2

90×4 Matrix{Float64}:
 6.59167  0.2837  0.153491  0.043888
 6.82979  0.6141  0.313509  0.061827
 8.89508  0.0     0.204244  0.009186
 7.56528  0.1997  0.248714  0.03627
 7.1624   0.174   0.299252  0.037367
 7.21891  0.0     0.258865  0.02088
 7.8536   0.0     0.182525  0.014385
 7.70391  0.2776  0.215275  0.029713
 9.06346  0.0     0.109614  0.002171
 8.15191  0.1484  0.110885  0.028579
 6.92952  0.0296  0.165784  0.020115
 7.23778  0.2151  0.078488  0.011581
 8.11582  0.4318  0.137482  0.026547
 ⋮                          
 7.89469  0.1062  0.247626  0.037392
 7.17549  0.0     0.179933  0.046376
 9.03097  0.0     0.293138  0.005517
 8.99554  0.0     0.30472   0.011658
 8.23483  0.0363  0.288405  0.011589
 8.33255  0.0     0.345485  0.006503
 8.64559  0.0     0.28844   0.005995
 8.99106  0.0     0.371898  0.014586
 8.02519  0.005   0.296437  0.013615
 9.03014  0.0     0.265778  0.008629
 8.86531  0.0     0.282939  0.005048
 8.91234  0.0     0.150366  0.024377

In [172]:
# # Get number of observations n and number of variables p
# n, p = size(X)

# # Get number of simulations to use (if simulations are necessary)
# R = numSim

# # Go through all possible combinations of homoskedasticy/heteroskedasticity
# # and X-dependent or independent error terms. The first two cases are
# # special cases: Handling the case there homoskedastic was set to None, and
# # where lambda_start was provided.
# #

# # 1) If homoskedastic was set to None (special case)
# if (isnothing(homoskedastic))
    
#     # Initialize lambda
#         lmbda0 = lambda_start
    
#     Ups0 = (1 /sqrt(n)) * sqrt.((y.^2)'*(x.^2))
            
#     # Calculate the final vector of penalty terms
#         lmbda = lmbda0 * Ups0
    
# # 2) If lambda_start was provided (special case)
# elseif (isnothing(lambda_start)) == 0
    
#     # Check whether a homogeneous penalty term was provided (a scalar)
#     if maximum(size(lambda_start)) == 1
#         # If so, repeat that p times as the penalty term
#         lmbda = ones(p,1).*lambda_start
    
#     else:
#         # Otherwise, use the provided vector of penalty terms as is
#         lmbda = lambda_start
#     end
        
# # 3) Homoskedastic and X-independent
# elseif homoskedastic == true &  X_dependent_lambda == false
    
#     # Initilaize lambda
#     lmbda0 = 2 * c * sqrt(n) * quantile(Normal(0.0, 1.0),1 - gamma/(2*p))
    
#     # Use ddof=1(corrected = true in Julia) to be consistent with R's var() function (in Julia by defaul the DDF is N-1)
#     Ups0 = sqrt(var(y, corrected = true))
    
#     # Calculate the final vector of penalty terms
#     lmbda = zeros(p,1) .+ lmbda0 * Ups0

# # 4) Homoskedastic and X-dependent
# elseif homoskedastic == true & X_dependent_lambda == true
#     psi = mean.(eachcol(x.^2))
#     tXtpsi = (x' ./ sqrt.(psi))'
    
#     R = 5000
#     sim = zeros(R,1)
    
#     for l in 1:R
#         g = reshape(repeat(randn(10), inner = p),(p, n))'
#         sim[l] = n * maximum(2*abs.(mean.(eachcol(tXtpsi.* g))))
#     end
    
#     # Initialize lambda based on the simulated quantiles
#     lambda0 = c*quantile(vec(sim), 1 - gamma)
    
#     Ups0 = sqrt(var(y, corrected = true))
    
#     # Calculate the final vector of penalty terms
#     lmbda = zeros(p,1) .+ lmbda0 * Ups0
 
# # 5) Heteroskedastic and X-independent
# elseif homoskedastic == false &  X_dependent_lambda == false
    
#     # The original includes the comment, "1=num endogenous variables"
#     lmbda0 = 2 * c * sqrt(n) * quantile(Normal(0.0, 1.0),1 - gamma/(2*p*1))
    
#     Ups0 = (1 /sqrt(n)) * sqrt.((y.^2)'*(x.^2))
#     lmbda = lmbda0 * Ups0

# # 6) Heteroskedastic and X-dependent
# elseif homoskedastic == false &  X_dependent_lambda == true
    
#     eh = y
#     ehat = reshape(repeat(eh, inner = p),(p, n))'
    
#     xehat = x.*ehat
#     psi = mean.(eachcol(xehat.^2))'
#     tXehattpsi = (xehat./sqrt.(psi))
    
#     R = 5000
#     sim = zeros(R,1)
    
#     for l in 1:R
#         g = reshape(repeat(randn(10), inner = p),(p, n))'
#         sim[l] = n * maximum(2*abs.(mean.(eachcol(tXtpsi.* g))))
#     end
    
#     # Initialize lambda based on the simulated quantiles
#     lambda0 = c*quantile(vec(sim), 1 - gamma)
    
#     Ups0 = (1 /sqrt(n)) * sqrt.((y.^2)'*(x.^2))
    
#     lmbda = lmbda0 * Ups0

# return Dict("lambda0" => lmbda0, "lambda" => lmbda, "Ups0" => Ups0) 
    
# end

LoadError: UndefVarError: numSim not defined