In [3]:
# import Pkg; Pkg.add("Distributions")
# import Pkg; Pkg.add("Tables")
# import Pkg; Pkg.add("TableOperations")
# import Pkg; Pkg.add("StatsBase")
# import Pkg; Pkg.add("FreqTables")
# import Pkg; Pkg.add("Plots")

In [4]:
# Import relevant packages for splitting data
using LinearAlgebra, GLM, DataFrames, Statistics, Random, Distributions, Tables, TableOperations, StatsBase, FreqTables, DataFrames

In [5]:
# Define a function which turn a list or vector-like object into a proper two
# dimensional column vector

function cvec(a)
    """ Turn a list or vector-like object into a proper column vector
    Input
    a: List or vector-like object, has to be a potential input for np.array()
    Output
    vec: two dimensional NumPy array, with the first dimension weakly greater
         than the second (resulting in a column vector for a vector-like input)
    """
    
    # Conver input into a two dimensional NumPy array
    vec = cat([a], dims = 2) 

    # Check whether the second dimension is strictly greater than the first
    # (remembering Python's zero indexing)
    
    if size(vec)[1] < size(vec)[2]
        # If so, transpose the input vector
        vec = transpose(vec)
    end
   
    # Return the column vector
    return vec

end
    

cvec (generic function with 1 method)

In [6]:
import Statistics.cor
function corre(y, X)
    
    """ Return correlation coefficients between columns of matrices
    Inputs
    y: n by 1 NumPy array
    X: n by k NumPy array
    Outputs
    corr: list of length k, where the k-th element is the correlation
          coefficient between y and the k-th column of X
    """
    # Concatenate y and X into a single NumPy array
    yX = hcat(y, X)
    
    # Get the correlation coefficients between all columns of that array
    corr = cor(yX)
    
    # Get the first row, starting at the first off-diagonal element (these are
    # the correlation coefficients between y and each column of X
    corr = corr[1, :] 
    
    # Return the result
    return corr

end
    
    

corre (generic function with 1 method)

In [7]:
function init_values(X, y, number::Int64=5, intercetp::Bool=true)
    """ Return an initial parameter guess for a LASSO model
    Inputs
    y: n by 1 NumPy array, outcome variable
    X: n by k NumPy array, RHS variables
    Outputs
    residuals: n ny 1 NumPy array, residuals for initial parameter guess
    coefficients: k by 1 NumPy array, initial coefficient values
    """
    # Make sure y is a proper column vector
    #y = cvec(y)
    
    # Get the absolute value of correlations between y and X
    corr = broadcast(abs, corre(y, X))
    
    # Get the number of columns of X
    kx = size(X)[2]
    
    # Make an index selecting the five columns of X which are most correlated
    # with y (since .argsort() always sorts in increasing order, selecting from
    # the back gets the most highly correlated columns)
    index = sortperm(corr, rev=true)[1: min(number, kx)]
    
    # Set up an array of coefficient guesses
    coefficients = zeros(kx)
    
    # Regress y on the five most correlated columns of X, including an intercept
    # if desired
    reg = lm(X[:, index], y)
    
    # Replace the guesses for the estimated coefficients (note that .coef_ does
    # not return the estimated intercept, if one was included in the model)
    
    coefficients[index] = GLM.coef(reg)
    
    # Replace any NANs as zeros
    replace!(coefficients, NaN=>0)
    
    # Get the regression residuals
    residuals = y - predict(reg, X[:, index])
    
    return residuals, reg, index, coefficients 
    
end
    

init_values (generic function with 3 methods)

In [8]:
using Parameters
@with_kw struct control
           maxIter::Int = 1000
           optTol::Float64 = 10^(-5)
           zeroThreshold::Float64 = 10^(-6)
       end

control

In [16]:
control(maxIter)

LoadError: UndefVarError: maxIter not defined

In [55]:
@unpack maxIter, optTol, zeroThreshold = control()

control
  maxIter: Int64 1000
  optTol: Float64 1.0000000000000006e-5
  zeroThreshold: Float64 1.0000000000000006e-6


In [56]:
optTol

1.0000000000000006e-5

In [64]:
function LassoShooting_fit( x, y, lambda, control::control, 
                            XX = nothing, Xy = nothing, beta_start = nothing)

     """ Shooting LASSO algorithm with variable dependent penalty weights
    Inputs
    x: n by p NumPy array, RHS variables
    y: n by 1 NumPy array, outcome variable
    lmbda: p by 1 NumPy array, variable dependent penalty terms. The j-th
           element is the penalty term for the j-th RHS variable.
    maxIter: integer, maximum number of shooting LASSO updated
    optTol: scalar, algorithm terminated once the sum of absolute differences
            between the updated and current weights is below optTol
    zeroThreshold: scalar, if any final weights are below zeroThreshold, they
                   will be set to zero instead
    XX: k by k NumPy array, pre-calculated version of x'x
    Xy: k by 1 NumPy array, pre-calculated version of x'y
    beta_start: k by 1 NumPy array, initial weights
    Outputs
    w: k by 1 NumPy array, final weights
    wp: k by m + 1 NumPy array, where m is the number of iterations the
        algorithm took. History of weight updates, starting with the initial
        weights.
    m: integer, number of iterations the algorithm took
    """
    n = size(x)[1]
    p = size(x)[2]
    
    # Check whether XX and Xy were provided, calculate them if not
    if (isnothing(XX))
        XX = x'*x
    end

    if (isnothing(Xy))
        Xy = x'*y
    end

    # Check whether an initial value for the intercept was provided

    if (isnothing(beta_start))
        # If not, use init_values from help_functions, which will return
        # regression estimates for the five variables in x which are most
        # correlated with y, and initialize all other coefficients as zero
        beta = init_values(x, y)['4']

    else
        # Otherwise, use the provided initial weights
        beta = beta_start
    end

    # Set up a history of weights over time, starting with the initial ones
    wp = beta

    # Keep track of the number of iterations
    m = 1

    # Create versions of XX and Xy which are just those matrices times two
    XX2 = XX * 2
    Xy2 = Xy * 2

    @unpack maxIter, optTol, zeroThreshold = control()

    # Go through all iteration
    while m<maxIter

        # Save the last set of weights (the .copy() is important, otherwise
        # beta_old will be updated every time beta is changed during the
        # following loop)
        beta_old = beta

        # Go through all parameters
        for j in range(p)
            
            # Calculate the shoot
            S0 = sum( XX2[j, :].*beta ) - XX2[j, j].*beta[j] - Xy2[j]

            # Update the weights
            if sum(isnothing(XX)) >= 1
                beta[j] = 0

            elseif S0 >lmbda[j]
                beta[j] = (lmbda[j] - S0) / XX2[j,j]

            elseif S0 < -lmbda[j]
                beta[j] = (-lmbda[j] - S0) / XX2[j,j]

            elseif broadcast(abs, S0) <= lmbda[j]
                beta[j] = 0

            end
        end

        # Add the updated weights to the history of weights
        wp = hcat(wp, beta)

        # Check whether the weights are within tolerance
        if sum(broadcast(abs, beta - beta_old)) < optTol
            # If so, break the while loop
            break
        end

        # Increase the iteration counter
        m = m + 1
    end

    # Set the final weights to the last updated weights
    w = beta   

    # Set weights which are within zeroThreshold to zero
    w[broadcast(abs, w) < zeroThreshold] = 0

    return w, wp, m

end
        

LassoShooting_fit (generic function with 4 methods)

In [39]:
# We have to make sure that both variables are the same type (Integers or floats) to avoid errors when running the regression
n = 1000
p = Int(n/2)

# Create a nxp matrix of standard Gaussians
X = randn(n, p)
beta = randn(p)

# Create a nx1 matrix of standard Gaussians
Y = randn(n)
corre(Y, X)

501-element Vector{Float64}:
  1.0
 -0.00012087924502604373
  0.06536263065884267
  0.026443391977236774
  0.013250340179950392
 -0.02944616205008811
  0.03810430920579904
 -0.005794179730374956
  0.04015394279216659
  0.013799537830154434
  0.018434724961286196
  0.020115313081845283
  0.06792043206202021
  ⋮
 -0.06470986382771099
  0.04175377754921327
  0.015953161692376264
 -0.0602188844070578
  0.03415835409904763
 -0.037088771537956366
  0.019041828558869142
  0.019938157772601835
 -0.015452294468559884
  0.021875869249325847
 -0.06823516957686443
 -0.002940132592505015

In [22]:
XX = nothing

if (isnothing(XX))
    XX = x'*x
end

if (isnothing(Xy))
    Xy = x'*y
end

# Check whether an initial value for the intercept was provided

if (isnothing(beta_start))
    # If not, use init_values from help_functions, which will return
    # regression estimates for the five variables in x which are most
    # correlated with y, and initialize all other coefficients as zero
    beta = init_values(x, y)['4']

else: 
    # Otherwise, use the provided initial weights
    beta = beta_start
    
# Set up a history of weights over time, starting with the initial ones
wp = beta

# Keep track of the number of iterations
m = 1

# Create versions of XX and Xy which are just those matrices times two
XX2 = XX * 2
Xy2 = Xy * 2

@unpack maxIter, optTol, zeroThreshold = control()

# Go through all iteration
while m<maxIter
        
        # Save the last set of weights (the .copy() is important, otherwise
        # beta_old will be updated every time beta is changed during the
        # following loop)
        beta_old = beta
        

LoadError: UndefVarError: x not defined

In [50]:
Xy2 = X'*Y*2
XX2 = X'*X*2
#beta = 3
# Calculate the shoot

S0 = sum( XX2[1, :].*beta ) - XX2[1, 1].*beta[1] - Xy2[1]

# Update the weights
if sum(isnothing(XX)) >= 1
    beta[j] = 0
    
elseif S0 >lmbda[j]
    beta[j] = (lmbda[j] - S0) / XX2[j,j]

elseif S0 < -lmbda[j]:
    beta[j] = (-lmbda[j] - S0) / XX2[j,j]
    
elseif np.abs(S0) <= lmbda[j]:
    beta[j] = 0

end

# Add the updated weights to the history of weights
wp = hcat(wp, beta)
    
# Check whether the weights are within tolerance
if sum(broadcast(abs, beta - beta_old)) < optTol:
    # If so, break the while loop
    break
end
    
# Increase the iteration counter
m = m + 1
    
# Set the final weights to the last updated weights
w = beta   

# Set weights which are within zeroThreshold to zero
w[broadcast(abs, w) < zeroThreshold] = 0

return w, wp, m
    

1321.9380098997306

500×500 Matrix{Float64}:
 1979.73        8.02888   -82.6735   …    43.3222     -58.9106    -43.9855
    8.02888  1939.32      -74.0397         7.15659    -10.7143     48.9423
  -82.6735    -74.0397   1897.21         -35.2187     -12.4275     10.6417
  -61.0868     49.5196     46.6044        -3.32645     33.6416    -32.6863
   35.7663     32.3199    -67.933        -62.7472     -53.2171      9.49623
   87.593      -3.45052    49.003    …   -44.9189     -17.4115    -14.9525
   15.9204   -104.76      -86.6438       -54.1746       5.59023    91.3983
   17.3425     19.3986    105.376         92.0661     215.451     -70.1305
  -28.2022     32.4509     13.6365        25.7939     -75.1109     -2.96491
   -3.52713   -32.802      80.299         -3.53892    -33.1873    -40.3107
 -103.601     112.519       2.35945  …   -15.5076     -59.0865     60.2993
   29.8319     21.3044    -65.9596       -61.1746      57.5209    -55.4077
  -23.4833     24.2646     71.4643        14.1607      60.3983    -66.415

In [52]:
XX = nothing
sum(isnothing(XX))

1

In [44]:
if (isnothing(XX))

500-element Vector{Float64}:
  649.5323052732912
    5.488395420644084
   68.06280186582002
   35.91118007550056
   15.022199146677965
  -21.83215950015125
  -16.662695985475953
    2.2346768400890653
   38.75416519685707
    1.1517941395510007
   92.20647568976926
   33.607653648216655
    8.600767642364477
    ⋮
    0.32767303634894845
  -12.690046912812408
   20.808326021875228
   12.01369693783245
   -0.2775459243551829
   -2.6269671418771945
   45.9147543678218
 -102.02473552924786
   11.808810183880468
   34.96758862833006
   72.88516969444596
   55.54044720609002

500-element Vector{Float64}:
  0.32809168011734535
  0.6835819551819734
 -0.8232725883428411
 -0.5878714316653364
  0.4200103773408705
 -0.2492456100964521
 -1.04662472573756
  0.12885549331263702
 -1.374153618957612
 -0.3265529417174109
 -0.8900152948782618
  1.1265675985160934
 -0.3662501641360256
  ⋮
 -0.0072689043331505155
  0.2714118950864144
 -0.3321927503121732
 -0.20158628574070084
 -0.11618582286436409
 -0.5096769984240165
  0.938592744112984
 -1.09940160378307
  0.23867894247854457
  0.8071514799031426
 -1.237216927989428
 -1.262698340448738

In [30]:
sum(XX[1, :] * beta)

3898.9223872346697

In [237]:
    
    5+5
else 
    4+4
end

10

In [219]:
a = list(maxIter = 1000, optTol = 10^(-5), zeroThreshold = 10^(-6))

LoadError: UndefVarError: list not defined

In [223]:
import Pkg; Pkg.add("Parameters")

[32m[1m    Updating[22m[39m registry at `C:\Users\Alexander\.julia\registries\General.toml`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m   Installed[22m[39m Parameters ─ v0.12.3
[32m[1m   Installed[22m[39m UnPack ───── v1.0.2
[32m[1m    Updating[22m[39m `C:\Users\Alexander\.julia\environments\v1.7\Project.toml`
 [90m [d96e819e] [39m[92m+ Parameters v0.12.3[39m
[32m[1m    Updating[22m[39m `C:\Users\Alexander\.julia\environments\v1.7\Manifest.toml`
 [90m [d96e819e] [39m[92m+ Parameters v0.12.3[39m
 [90m [3a884ed6] [39m[92m+ UnPack v1.0.2[39m
[32m[1mPrecompiling[22m[39m project...
[32m  ✓ [39m[90mUnPack[39m
[32m  ✓ [39mParameters
  2 dependencies successfully precompiled in 1 seconds (68 already precompiled)


In [227]:
using Parameters
@with_kw struct A
           a::Int = 6
           b::Float64 = -1.1
           c::UInt8
       end

A

In [224]:
using Parameters

type Params
    a::TypeOfA
    b::TypeOfB
    c::TypeOfC
end

LoadError: syntax: extra token "Params" after end of expression

In [228]:
A(c=4)

A
  a: Int64 6
  b: Float64 -1.1
  c: UInt8 0x04


In [229]:
A(c=4, a = 2)


A
  a: Int64 2
  b: Float64 -1.1
  c: UInt8 0x04


In [225]:
function dxdt(x, p::Params)
    p.a*x^2 + p.b*x + p.c
end

LoadError: UndefVarError: Params not defined

In [241]:
cross(Y, Y)

LoadError: DimensionMismatch("cross product is only defined for vectors of length 3")

In [238]:
X

1000×500 Matrix{Float64}:
  0.320504    1.14625     0.298372   …   0.527972     1.17221    -0.375504
  0.577118   -1.15784     1.52036       -0.400193    -1.54773     0.0994018
 -0.326732   -0.291426   -1.11486       -0.269266     0.4892      1.21931
 -0.672119    1.69133     0.471579       0.490546     0.284411    0.593264
 -0.398246    0.506194    0.195175      -0.466362    -1.41399     0.349464
 -0.570435    1.50233     0.337755   …   0.0225933    0.651143   -0.882149
  0.88426     0.0615439  -0.798528      -0.611395    -0.551933    0.299965
  0.0695875  -0.0737481   1.9851         0.519225     0.228208    0.374031
 -0.165538    1.12471     0.113689       0.300726     2.0811      0.745988
  0.229808    1.62616    -0.514148      -0.787419    -1.3068      0.610543
  0.0447416  -0.668053   -0.782319   …  -1.28375     -1.31779    -1.45768
 -0.942964   -0.63967     0.888524      -0.671943     0.770762    0.0628972
  0.579881    0.958627    0.131638       0.102862     1.27528    -1.18949


In [209]:
init_values(X,Y)[3]

5-element Vector{Int64}:
   1
 251
 246
  42
 115

In [210]:
cvec(X)

1×1 Matrix{Matrix{Float64}}:
 [0.3205038540809568 1.1462460602530362 … 1.1722062368332917 -0.3755041837937341; 0.5771176089449284 -1.1578371393750535 … -1.547729519036273 0.09940177005469272; … ; 0.9610560229135284 -0.05650135793843897 … -0.07341403741028152 1.0031426031995534; 0.2654207694072223 -0.7121275221457716 … -1.4756293553765882 -0.012545777760569515]

In [211]:
corre(Y, X)

501-element Vector{Float64}:
  1.0
 -0.003886119431972953
  0.03706782749294737
  0.03494361517156133
  0.012419207517462444
 -0.02730509293031313
 -0.004814898460052241
 -0.025987877930942885
  0.03760908249609779
 -0.01958537824382861
  0.010535119000819186
  0.03744267153954585
  0.001426247728775916
  ⋮
  0.048185048214358824
 -0.021221444240598384
  0.0023730809067254084
  0.008513807964957315
 -0.01107407506240078
 -0.01906917318260618
  0.013079792496754079
 -0.02371831145875313
 -0.02095867341069991
  0.07836177941848728
 -0.07834830154906036
  0.0038602863764652545

In [192]:
init_values(X, Y)

LoadError: ArgumentError: number of rows of each array must match (got (1, 1000))

In [190]:
    # Make sure y is a proper column vector
    Y = cvec(Y)
    
    # Get the absolute value of correlations between y and X
    corr = broadcast(abs, corre(Y, X))
    

LoadError: ArgumentError: number of rows of each array must match (got (1, 1000))

In [196]:
# We have to make sure that both variables are the same type (Integers or floats) to avoid errors when running the regression
n = 1000
p = Int(n/2)

# Create a nxp matrix of standard Gaussians
X = randn(n, p)

# Create a nx1 matrix of standard Gaussians
Y = randn(n)
corre(Y, X)

501-element Vector{Float64}:
  1.0
 -0.003886119431972953
  0.03706782749294737
  0.03494361517156133
  0.012419207517462444
 -0.02730509293031313
 -0.004814898460052241
 -0.025987877930942885
  0.03760908249609779
 -0.01958537824382861
  0.010535119000819186
  0.03744267153954585
  0.001426247728775916
  ⋮
  0.048185048214358824
 -0.021221444240598384
  0.0023730809067254084
  0.008513807964957315
 -0.01107407506240078
 -0.01906917318260618
  0.013079792496754079
 -0.02371831145875313
 -0.02095867341069991
  0.07836177941848728
 -0.07834830154906036
  0.0038602863764652545

In [189]:
    # Get the number of columns of X
    kx = size(X)[2]
    
    # Make an index selecting the five columns of X which are most correlated
    # with y (since .argsort() always sorts in increasing order, selecting from
    # the back gets the most highly correlated columns)
    index = sortperm(corr, rev=true)[1: min(number, kx)]
    

LoadError: ArgumentError: number of rows of each array must match (got (1, 1000))

In [188]:
    # Set up an array of coefficient guesses
    coefficients = zeros(kx)
    
    # Regress y on the five most correlated columns of X, including an intercept
    # if desired
    reg = lm(X[:, index], Y)
    
    # Replace the guesses for the estimated coefficients (note that .coef_ does
    # not return the estimated intercept, if one was included in the model)
    
    coefficients[index] = GLM.coef(reg)
    
    # Replace any NANs as zeros
    replace!(coefficients, NaN=>0)
    
    # Get the regression residuals
    residuals = Y - predict(reg, X[:, index])

LoadError: ArgumentError: number of rows of each array must match (got (1, 1000))

In [185]:
size(X)[2]

500

In [158]:
ols = lm(X[:, index], Y)

LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:

Coefficients:
──────────────────────────────────────────────────────────────────
         Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
──────────────────────────────────────────────────────────────────
x1  -0.0203985   0.0307064  -0.66    0.5066  -0.0806552  0.0398581
x2  -0.0247873   0.0312072  -0.79    0.4272  -0.0860265  0.036452
x3  -0.0423043   0.030425   -1.39    0.1647  -0.102009   0.0174001
──────────────────────────────────────────────────────────────────


In [168]:
a = [1 2 3; 4 5 Inf]
replace!(a, Inf=>NaN)

2×3 Matrix{Float64}:
 1.0  2.0    3.0
 4.0  5.0  NaN

In [169]:
a

2×3 Matrix{Float64}:
 1.0  2.0    3.0
 4.0  5.0  NaN

In [174]:
mean(Y - predict(ols, X[:, index]))

0.014763077531358108

In [166]:
a = [1 1 1 4 5 6]

1×6 Matrix{Int64}:
 1  1  1  4  5  6

In [171]:
replace!(a, NaN=>0)

2×3 Matrix{Float64}:
 1.0  2.0  3.0
 4.0  5.0  0.0

In [160]:

GLM.coef(ols)

3-element Vector{Float64}:
 -0.020398505457739254
 -0.024787273154710084
 -0.04230432198737893

In [147]:
a = [1 2 3 4 5 6]
b = [50 6 10 8 11 15; 15 10 40 20 18 19; 1 5 9 7 5 88; 3 5 7 8 8 11; 4 8 9 7 5 3 ]
correl = corre(transpose(a), transpose(b))
correl

6-element Vector{Float64}:
  1.0
 -0.521352401935652
  0.1246802315106248
  0.6842412124629703
  0.9694584179118517
 -0.3614031611621005

In [148]:
# We have to make sure that both variables are the same type (Integers or floats) to avoid errors when running the regression
n = 1000
p = Int(n/2)

# Create a nxp matrix of standard Gaussians
X = randn(n, p)

# Create a nx1 matrix of standard Gaussians
Y = randn(n)

1000-element Vector{Float64}:
  0.31406422831316066
 -1.0993980457316037
  0.8904762854086176
 -1.3362181789299465
 -1.2709415829941277
 -0.5700644770111263
  1.8975500839259551
  0.867329064150445
 -0.37871911493251176
  0.4019643214773681
  1.756269018284203
  1.497051192242682
  0.4349924710384791
  ⋮
  0.2920256281470452
 -0.7501161547891307
  0.4996084216019428
 -0.04407197393989204
  1.176801049227538
 -1.4525410822778444
 -1.0842708152155545
  0.07515026558261212
  0.6580326270741598
  1.0600394433444684
  0.24687194870050067
  1.3831861222026514

In [151]:
lm(X[:, index], Y)

LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:

Coefficients:
──────────────────────────────────────────────────────────────────
         Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
──────────────────────────────────────────────────────────────────
x1  -0.0203985   0.0307064  -0.66    0.5066  -0.0806552  0.0398581
x2  -0.0247873   0.0312072  -0.79    0.4272  -0.0860265  0.036452
x3  -0.0423043   0.030425   -1.39    0.1647  -0.102009   0.0174001
──────────────────────────────────────────────────────────────────


In [156]:
x = [1,2,3]; y = [2,5,7]
lm(reshape(x, length(x), 1), y)

LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:

Coefficients:
──────────────────────────────────────────────────────────────
      Coef.  Std. Error      t  Pr(>|t|)  Lower 95%  Upper 95%
──────────────────────────────────────────────────────────────
x1  2.35714   0.0874818  26.94    0.0014    1.98074    2.73355
──────────────────────────────────────────────────────────────


In [153]:
index

3-element Vector{Int64}:
 1
 2
 3

In [142]:
zeros(10)

10-element Vector{Float64}:
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0
 0.0

In [77]:
corr = broadcast(abs, correl)

3-element Vector{Float64}:
 1.0
 0.747748938368323
 0.4417926243064343

In [83]:
minimum(corr)

0.4417926243064343

In [87]:
index = sort(correl, rev=true)[1:min(5,2)]

2-element Vector{Float64}:
 1.0
 0.9694584179118517

In [140]:
index = sortperm(corr, rev=true)

3-element Vector{Int64}:
 1
 2
 3

In [141]:
corr[index]

3-element Vector{Float64}:
 1.0
 0.747748938368323
 0.4417926243064343

In [134]:
index = sortperm(corr)[1:min(5,3)]

3-element Vector{Int64}:
 3
 2
 1

In [132]:
corr

3-element Vector{Float64}:
 1.0
 0.747748938368323
 0.4417926243064343

In [123]:
r = rand(0:9, 5)

5-element Vector{Int64}:
 8
 6
 3
 8
 5

In [121]:
a = [3 6 1 3 9]

1×5 Matrix{Int64}:
 3  6  1  3  9

In [122]:
r

5-element Vector{Int64}:
 0
 7
 1
 8
 4

In [130]:
i = sortperm(corr )

3-element Vector{Int64}:
 3
 2
 1

In [129]:
r[i]

5-element Vector{Int64}:
 3
 5
 6
 8
 8

In [75]:
correl = corre(transpose(a), transpose(b))

3-element Vector{Float64}:
  1.0
 -0.747748938368323
  0.4417926243064343

In [64]:
yX = hcat(transpose(y), transpose(X))



4×3 Matrix{Int64}:
 1  50  15
 2   6  10
 3  10  40
 4   8  20

In [46]:
corr = cor(yX)

3×3 Matrix{Float64}:
  1.0       -0.747749   0.441793
 -0.747749   1.0       -0.243701
  0.441793  -0.243701   1.0

In [48]:
corr = corr[1, :] 

3-element Vector{Float64}:
  1.0
 -0.747748938368323
  0.4417926243064343

In [None]:
function OLSestimator(Y, X)

    β = inv(X'*X)*(X'*Y)
    # β = X\Y
    errors = Y - X*β
    R_squared = 1.0 - sum(errors.^2.0)/sum((Y .- mean(Y)).^2.0)
    R_squared_adj =  1.0 - ( 1.0 - R_squared )*( size(Y)[1] - 1.0 )/( size(Y)[1]- size(X)[2] - 1.0 )    
    
    return β, R_squared, R_squared_adj
end

In [6]:
a = [1 2 3]
vec = cat(a, dims = 2) 

1×3 Matrix{Int64}:
 1  2  3

In [8]:
 size(a)[1]

1

In [10]:
transpose(a)

3×1 transpose(::Matrix{Int64}) with eltype Int64:
 1
 2
 3

In [None]:
if x < y
    println("x is less than y")
elseif x > y
    println("x is greater than y")
else
    println("x is equal to y")
end