https://github.com/JuliaStats/StatsModels.jl/issues/220

## Reproduce the slow-down (1)

In [1]:
using DataFrames
using GLM
using StatsBase: sample

n = 20
r = 10
x_symbols = [Symbol("x$i") for i in 1:n]
@time df = DataFrame(rand(100, n+1), [:y; x_symbols]);

  0.484371 seconds (1.89 M allocations: 109.199 MiB, 18.43% gc time, 99.95% compilation time)


In [2]:
x_vars = sample(x_symbols, r; replace=false)
@time F = term(:y) ~ sum(term(x) for x in x_vars)
@time cols = Tables.columntable(df)
@time mf = ModelFrame(F, cols, model=LinearModel)
@time mm = ModelMatrix(mf)
@time y = response(mf)
@time linmodel = fit(LinearModel, mm.m, y)
@time regmodel = StatsModels.TableRegressionModel(linmodel, mf, mm)

  0.145674 seconds (279.83 k allocations: 18.107 MiB, 41.95% gc time, 100.84% compilation time)
  0.223910 seconds (653.37 k allocations: 41.294 MiB, 3.32% gc time, 43.23% compilation time)
  1.575161 seconds (5.94 M allocations: 373.246 MiB, 4.93% gc time)
  0.297392 seconds (1.20 M allocations: 75.759 MiB, 2.81% gc time)
  0.006010 seconds (3.78 k allocations: 283.598 KiB, 99.76% compilation time)
  1.181900 seconds (5.13 M allocations: 290.800 MiB, 7.37% gc time, 0.11% compilation time)
  0.003476 seconds (3.76 k allocations: 276.488 KiB, 96.21% compilation time)


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

y ~ 1 + x15 + x17 + x8 + x16 + x4 + x12 + x10 + x19 + x13 + x18

Coefficients:
─────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)    Lower 95%  Upper 95%
─────────────────────────────────────────────────────────────────────────────
(Intercept)   0.211663     0.16661     1.27    0.2072  -0.119388    0.542714
x15          -0.177842     0.103339   -1.72    0.0887  -0.383175    0.0274904
x17           0.15279      0.108186    1.41    0.1614  -0.0621732   0.367753
x8           -0.00321576   0.10775    -0.03    0.9763  -0.217313    0.210882
x16           0.180192     0.102892    1.75    0.0833  -0.0242534   0.384637
x4           -0.0121941    0.117127   -0.10    0.9173  -0.244923    0.220535
x12          -0.089938     0.0931882  -0.97   

In [3]:
x_vars = sample(x_symbols, r; replace=false)
@time F = term(:y) ~ sum(term(x) for x in x_vars)
@time cols = Tables.columntable(df)
@time mf = ModelFrame(F, cols, model=LinearModel)
@time mm = ModelMatrix(mf)
@time y = response(mf)
@time linmodel = fit(LinearModel, mm.m, y)
@time regmodel = StatsModels.TableRegressionModel(linmodel, mf, mm)

  0.081865 seconds (79.69 k allocations: 5.129 MiB, 14.22% gc time, 99.51% compilation time)
  0.000025 seconds (29 allocations: 1.766 KiB)
  0.059309 seconds (136.59 k allocations: 8.852 MiB, 10.10% compilation time)
  0.062542 seconds (80.30 k allocations: 5.017 MiB, 99.66% compilation time)
  0.006906 seconds (3.77 k allocations: 282.848 KiB, 99.77% compilation time)
  0.000027 seconds (21 allocations: 23.188 KiB)
  0.004031 seconds (2.75 k allocations: 194.801 KiB, 96.95% compilation time)


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

y ~ 1 + x4 + x2 + x10 + x16 + x14 + x18 + x19 + x17 + x8 + x1

Coefficients:
────────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error     t  Pr(>|t|)     Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)  0.0621125     0.178754  0.35    0.7291  -0.293068      0.417293
x4           0.0193336     0.120093  0.16    0.8725  -0.219289      0.257957
x2           0.0132758     0.101459  0.13    0.8962  -0.188322      0.214873
x10          0.176226      0.102386  1.72    0.0887  -0.0272116     0.379665
x16          0.207085      0.104168  1.99    0.0499   0.000106052   0.414065
x14          0.0727129     0.101191  0.72    0.4743  -0.128351      0.273777
x18          0.0353622     0.10267   0.34    0.7313 

## Reproduce the slow-down (2)

In [4]:
using DataFrames
using GLM
using StatsBase: sample

n = 20
r = 10
x_symbols = [Symbol("x$i") for i in 1:n]
df = DataFrame(rand(100, n+1), [:y; x_symbols]);

result = []
for _ in 1:10
    x_vars = sample(x_symbols, r; replace=false)
    F = term(:y) ~ sum(term(x) for x in x_vars)
    @time regmodel = lm(F, df)
    push!(result, regmodel)
end
result

  1.958667 seconds (6.72 M allocations: 457.123 MiB, 14.14% gc time)
  0.147574 seconds (223.50 k allocations: 14.361 MiB, 2.97% compilation time)
  0.144807 seconds (223.50 k allocations: 14.362 MiB, 3.12% compilation time)
  0.136062 seconds (223.50 k allocations: 14.362 MiB, 5.44% gc time, 3.65% compilation time)
  0.126303 seconds (223.51 k allocations: 14.373 MiB, 3.08% compilation time)
  0.129163 seconds (223.51 k allocations: 14.364 MiB, 3.09% compilation time)
  0.155178 seconds (223.51 k allocations: 14.369 MiB, 5.60% gc time, 2.54% compilation time)
  0.134527 seconds (223.50 k allocations: 14.361 MiB, 3.60% compilation time)
  0.127977 seconds (223.51 k allocations: 14.371 MiB, 3.06% compilation time)
  0.153585 seconds (223.50 k allocations: 14.364 MiB, 5.72% gc time, 2.96% compilation time)


10-element Vector{Any}:
 StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

y ~ 1 + x13 + x2 + x6 + x18 + x14 + x20 + x16 + x3 + x11 + x7

Coefficients:
───────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
───────────────────────────────────────────────────────────────────────────
(Intercept)   0.21445      0.179862   1.19    0.2363  -0.142931    0.571831
x13           0.0644428    0.118492   0.54    0.5879  -0.170999    0.299884
x2            0.140088     0.108391   1.29    0.1996  -0.0752826   0.355459
x6            0.199099     0.115614   1.72    0.0885  -0.0306241   0.428823
x18           0.0670641    0.102695   0.65    0.5154  -0.136989    0.271117
x14          -0.0556884    0.109835  -0.51    0.6134  -0.273928    0.162551
x20          -0.0228469    0.113678 

## Solution

In [5]:
using DataFrames
using GLM
using StatsBase: sample

struct MyLinearModel{T, Y, X}
    linmodel::T
    y_var::Y
    x_vars::X
end

function my_lm(y_var::Symbol, x_vars::AbstractVector{Symbol}, df::DataFrame)
    y = df[!, y_var]
    X = [ones(nrow(df)) Matrix(df[!, x_vars])]
    linmodel = lm(X, y)
    MyLinearModel(linmodel, y_var, x_vars)
end

get_y_var(F::FormulaTerm) = F.lhs.sym
get_x_vars(F::FormulaTerm) = collect((t -> t.sym).(F.rhs))
my_lm(F::FormulaTerm, df::DataFrame) = my_lm(get_y_var(F), get_x_vars(F), df)

function Base.show(io::IO, mylm::MyLinearModel)
    linmodel, y_var, x_vars = getfield.(Ref(mylm), (:linmodel, :y_var, :x_vars))
    ct = coeftable(linmodel)
    ct.rownms .= string.((Symbol("(Intercept)"), x_vars...,))
    print(io, typeof(mylm), "\n\n")
    print(io, y_var, " ~ 1")
    for x in x_vars print(io, " + ", x) end
    print(io, "\n\nCoefficients:\n")
    show(io, ct)
    print(io, "\n")
end

n = 20
r = 10
x_symbols = [Symbol("x$i") for i in 1:n]
df = DataFrame(rand(100, n+1), [:y; x_symbols]);

myresult = []
for _ in 1:10
    x_vars = sample(x_symbols, r; replace=false)
    F = term(:y) ~ sum(term(x) for x in x_vars)
    @time mylinmodel = my_lm(F, df)
    push!(myresult, mylinmodel)
end
myresult

  0.293451 seconds (770.34 k allocations: 48.130 MiB, 2.63% gc time)
  0.000060 seconds (63 allocations: 43.859 KiB)
  0.000039 seconds (63 allocations: 43.859 KiB)
  0.000026 seconds (63 allocations: 43.859 KiB)
  0.000026 seconds (63 allocations: 43.859 KiB)
  0.000056 seconds (63 allocations: 43.859 KiB)
  0.000035 seconds (63 allocations: 43.859 KiB)
  0.000027 seconds (63 allocations: 43.859 KiB)
  0.000027 seconds (63 allocations: 43.859 KiB)
  0.000046 seconds (63 allocations: 43.859 KiB)


10-element Vector{Any}:
 MyLinearModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Symbol, Vector{Symbol}}

y ~ 1 + x14 + x17 + x16 + x20 + x8 + x1 + x7 + x2 + x3 + x19

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.523437     0.157095    3.33    0.0013   0.211293   0.835581
x14          -0.0275003    0.0961445  -0.29    0.7755  -0.218537   0.163537
x17          -0.18369      0.110731   -1.66    0.1007  -0.40371    0.0363297
x16          -0.0908091    0.096161   -0.94    0.3476  -0.281879   0.100261
x20           0.0355128    0.105439    0.34    0.7371  -0.173991   0.245017
x8            0.00252197   0.0966541   0.03    0.9792  -0.189528   0.194572
x1            0.0851432    0.111864    0.76  

## Comparison

In [6]:
F = @formula(y ~ x9 + x5 + x1 + x4 + x6 + x8 + x7 + x2 + x3)
@time lm(F, df)

  0.448955 seconds (1.05 M allocations: 65.980 MiB, 25.70% compilation time)


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

y ~ 1 + x9 + x5 + x1 + x4 + x6 + x8 + x7 + x2 + x3

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)  Lower 95%   Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.502396     0.138674    3.62    0.0005   0.226896   0.777896
x9           -0.237931     0.0929719  -2.56    0.0122  -0.422635  -0.0532258
x5            0.149981     0.0928846   1.61    0.1099  -0.034551   0.334512
x1            0.0136653    0.106448    0.13    0.8981  -0.197812   0.225143
x4            0.074822     0.0970895   0.77    0.4429  -0.118063   0.267707
x6            0.0340805    0.0930273   0.37    0.7150  -0.150734   0.218895
x8            0.0261769    0.0910603   0.29    0.7744  -0.15473    0

In [7]:
F = @formula(y ~ x9 + x5 + x1 + x4 + x6 + x8 + x7 + x3 + x2)
@time my_lm(F, df)

  0.025260 seconds (156.59 k allocations: 10.167 MiB, 99.28% compilation time)


MyLinearModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Symbol, Vector{Symbol}}

y ~ 1 + x9 + x5 + x1 + x4 + x6 + x8 + x7 + x3 + x2

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)  Lower 95%   Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.502396     0.138674    3.62    0.0005   0.226896   0.777896
x9           -0.237931     0.0929719  -2.56    0.0122  -0.422635  -0.0532258
x5            0.149981     0.0928846   1.61    0.1099  -0.034551   0.334512
x1            0.0136653    0.106448    0.13    0.8981  -0.197812   0.225143
x4            0.074822     0.0970895   0.77    0.4429  -0.118063   0.267707
x6            0.0340805    0.0930273   0.37    0.7150  -0.150734   0.218895
x8            0.0261769    0.0910603   0.29    0.7744  -0.15473    0.207084
x7  

In [8]:
F = @formula(y ~ x9 + x5 + x1 + x4 + x6 + x8 + x3 + x2 + x7)
y_var, x_vars = get_y_var(F), get_x_vars(F)
@time my_lm(y_var, x_vars, df)

  0.000147 seconds (55 allocations: 39.828 KiB)


MyLinearModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Symbol, Vector{Symbol}}

y ~ 1 + x9 + x5 + x1 + x4 + x6 + x8 + x3 + x2 + x7

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)  Lower 95%   Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.502396     0.138674    3.62    0.0005   0.226896   0.777896
x9           -0.237931     0.0929719  -2.56    0.0122  -0.422635  -0.0532258
x5            0.149981     0.0928846   1.61    0.1099  -0.034551   0.334512
x1            0.0136653    0.106448    0.13    0.8981  -0.197812   0.225143
x4            0.074822     0.0970895   0.77    0.4429  -0.118063   0.267707
x6            0.0340805    0.0930273   0.37    0.7150  -0.150734   0.218895
x8            0.0261769    0.0910603   0.29    0.7744  -0.15473    0.207084
x3  

## Analysis of the slow-down

In [9]:
x_vars = sample(x_symbols, r; replace=false)
@show x_vars
@time F = term(:y) ~ sum(term(x) for x in x_vars)
@time lm(F, df)

x_vars = [:x7, :x20, :x5, :x1, :x18, :x17, :x3, :x6, :x19, :x12]
  0.061240 seconds (79.67 k allocations: 5.126 MiB, 99.45% compilation time)
  0.134871 seconds (223.50 k allocations: 14.362 MiB, 6.27% gc time, 3.08% compilation time)


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

y ~ 1 + x7 + x20 + x5 + x1 + x18 + x17 + x3 + x6 + x19 + x12

Coefficients:
───────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
───────────────────────────────────────────────────────────────────────────
(Intercept)   0.353652    0.153175    2.31    0.0233   0.0492953  0.658008
x7           -0.0327077   0.106467   -0.31    0.7594  -0.244255   0.17884
x20           0.0340616   0.106244    0.32    0.7493  -0.177043   0.245166
x5            0.16345     0.0976176   1.67    0.0976  -0.0305147  0.357414
x1            0.0109484   0.111433    0.10    0.9220  -0.210467   0.232364
x18          -0.122705    0.105102   -1.17    0.2461  -0.33154    0.0861295
x17          -0.198469    0.109435   -1.81    0.0731  -0.415914   0.

In [10]:
@time my_lm(F, df)

  0.000059 seconds (57 allocations: 43.672 KiB)


MyLinearModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Symbol, Vector{Symbol}}

y ~ 1 + x7 + x20 + x5 + x1 + x18 + x17 + x3 + x6 + x19 + x12

Coefficients:
───────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
───────────────────────────────────────────────────────────────────────────
(Intercept)   0.353652    0.153175    2.31    0.0233   0.0492953  0.658008
x7           -0.0327077   0.106467   -0.31    0.7594  -0.244255   0.17884
x20           0.0340616   0.106244    0.32    0.7493  -0.177043   0.245166
x5            0.16345     0.0976176   1.67    0.0976  -0.0305147  0.357414
x1            0.0109484   0.111433    0.10    0.9220  -0.210467   0.232364
x18          -0.122705    0.105102   -1.17    0.2461  -0.33154    0.0861295
x17          -0.198469    0.109435   -1.81    0.0731  -0.415914   0.0189767
x3  

In [11]:
x_vars = sample(x_symbols, r; replace=false)
@time F = term(:y) ~ sum(term(x) for x in x_vars)
@time my_lm(F, df)

  0.071065 seconds (79.68 k allocations: 5.130 MiB, 99.45% compilation time)
  0.000051 seconds (57 allocations: 43.672 KiB)


MyLinearModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Symbol, Vector{Symbol}}

y ~ 1 + x15 + x9 + x13 + x14 + x5 + x6 + x17 + x2 + x4 + x12

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)  Lower 95%   Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.639627     0.155678    4.11    <1e-04   0.330298   0.948957
x15          -0.028043     0.0902542  -0.31    0.7567  -0.207376   0.15129
x9           -0.245535     0.0936314  -2.62    0.0103  -0.431579  -0.0594914
x13           0.0309668    0.0928319   0.33    0.7395  -0.153488   0.215422
x14           0.00250496   0.0941011   0.03    0.9788  -0.184472   0.189482
x5            0.155966     0.0918486   1.70    0.0930  -0.026535   0.338467
x6            0.0455136    0.0917293   0.50    0.6210  -0.136751   0.22

In [12]:
x_vars = sample(x_symbols, r; replace=false)
@time F = term(:y) ~ sum(term(x) for x in x_vars)
@time cols = Tables.columntable(df)
@time mf = ModelFrame(F, cols, model=LinearModel)
@time mm = ModelMatrix(mf)
@time y = response(mf)
@time linmodel = fit(LinearModel, mm.m, y)
@time regmodel = StatsModels.TableRegressionModel(linmodel, mf, mm);

  0.072341 seconds (79.68 k allocations: 5.143 MiB, 99.46% compilation time)
  0.000025 seconds (29 allocations: 1.766 KiB)
  0.075051 seconds (136.64 k allocations: 8.856 MiB, 15.63% gc time, 6.46% compilation time)
  0.064080 seconds (80.29 k allocations: 5.017 MiB, 99.44% compilation time)
  0.006485 seconds (3.77 k allocations: 282.191 KiB, 99.73% compilation time)
  0.000027 seconds (21 allocations: 23.188 KiB)
  0.003819 seconds (2.75 k allocations: 194.645 KiB, 96.56% compilation time)


In [13]:
typeof(cols)

NamedTuple{(:y, :x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :x10, :x11, :x12, :x13, :x14, :x15, :x16, :x17, :x18, :x19, :x20), NTuple{21, Vector{Float64}}}

In [14]:
typeof(mf)

ModelFrame{NamedTuple{(:y, :x11, :x6, :x14, :x8, :x20, :x7, :x18, :x15, :x3, :x19), NTuple{11, Vector{Float64}}}, LinearModel}

In [15]:
typeof(mm) |> x -> (fieldnames(x), fieldtypes(x))

((:m, :assign), (Matrix{Float64}, Vector{Int64}))

In [16]:
typeof(y)

Vector{Float64} (alias for Array{Float64, 1})

In [17]:
typeof(linmodel)

LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}

In [18]:
typeof(regmodel)

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

In [19]:
regmodel.model == linmodel

true

In [20]:
regmodel.mf == mf

true

In [21]:
regmodel.mm == mm

true

In [22]:
@which lm(F, df)

In [23]:
@which fit(LinearModel, F, df)

In [24]:
@which fit(LinearModel, mm.m, y)

In [25]:
@code_warntype term(:y) ~ sum(term(x) for x in x_vars)

MethodInstance for ~(::[0mTerm, ::[0mNTuple{10, Term})
  from ~(lhs::Union{Tuple{AbstractTerm, Vararg{AbstractTerm}}, AbstractTerm}, rhs::Union{Tuple{AbstractTerm, Vararg{AbstractTerm}}, AbstractTerm}) in StatsModels at D:\.julia\packages\StatsModels\JZLpf\src\terms.jl:401
Arguments
  #self#[36m::Core.Const(~)[39m
  lhs[36m::Term[39m
  rhs[36m::NTuple{10, Term}[39m
Body[36m::FormulaTerm{Term, NTuple{10, Term}}[39m
[90m1 ─[39m %1 = StatsModels.FormulaTerm(lhs, rhs)[36m::FormulaTerm{Term, NTuple{10, Term}}[39m
[90m└──[39m      return %1



In [26]:
@code_warntype Tables.columntable(df)

MethodInstance for Tables.columntable(::[0mDataFrame)
  from columntable(itr::T) where T in Tables at D:\.julia\packages\Tables\gg6Id\src\namedtuples.jl:170
Static Parameters
  T = [36mDataFrame[39m
Arguments
  #self#[36m::Core.Const(Tables.columntable)[39m
  itr[36m::DataFrame[39m
Locals
  cols[36m::DataFrames.DataFrameColumns{DataFrame}[39m
Body[91m[1m::NamedTuple[22m[39m
[90m1 ─[39m      (cols = Tables.columns(itr))
[90m│  [39m %2 = (cols isa Tables.ColumnTable)[36m::Core.Const(false)[39m
[90m└──[39m      goto #3 if not %2
[90m2 ─[39m      Core.Const(:(return cols))
[90m3 ┄[39m %5 = Tables.schema(cols)[91m[1m::Tables.Schema[22m[39m
[90m│  [39m %6 = Tables.columntable(%5, cols)[91m[1m::NamedTuple[22m[39m
[90m└──[39m      return %6



In [27]:
@code_warntype ModelFrame(F, cols, model=LinearModel)

MethodInstance for (::Core.var"#Type##kw")(::[0mNamedTuple{(:model,), Tuple{UnionAll}}, ::[0mType{ModelFrame}, ::[0mFormulaTerm{Term, NTuple{10, Term}}, ::[0mNamedTuple{(:y, :x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :x10, :x11, :x12, :x13, :x14, :x15, :x16, :x17, :x18, :x19, :x20), NTuple{21, Vector{Float64}}})
  from (::Core.var"#Type##kw")(::Any, ::Type{ModelFrame}, f::FormulaTerm, data::NamedTuple{names, T} where {N, D, names, T<:Tuple{Vararg{AbstractArray{S, D} where S, N}}}) in StatsModels at D:\.julia\packages\StatsModels\JZLpf\src\modelframe.jl:72
Arguments
  _[36m::Core.Const(Core.var"#Type##kw"())[39m
  @_2[36m::NamedTuple{(:model,), Tuple{UnionAll}}[39m
  @_3[36m::Type{ModelFrame}[39m
  f[36m::FormulaTerm{Term, NTuple{10, Term}}[39m
  data[36m::NamedTuple{(:y, :x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :x10, :x11, :x12, :x13, :x14, :x15, :x16, :x17, :x18, :x19, :x20), NTuple{21, Vector{Float64}}}[39m
Locals
  model[91m[1m::UnionAll[22m[39m
  contrasts[

In [28]:
@code_warntype ModelMatrix(mf)

MethodInstance for ModelMatrix(::[0mModelFrame{NamedTuple{(:y, :x11, :x6, :x14, :x8, :x20, :x7, :x18, :x15, :x3, :x19), NTuple{11, Vector{Float64}}}, LinearModel})
  from ModelMatrix(mf::ModelFrame) in StatsModels at D:\.julia\packages\StatsModels\JZLpf\src\modelframe.jl:222
Arguments
  #self#[36m::Type{ModelMatrix}[39m
  mf[36m::ModelFrame{NamedTuple{(:y, :x11, :x6, :x14, :x8, :x20, :x7, :x18, :x15, :x3, :x19), NTuple{11, Vector{Float64}}}, LinearModel}[39m
Body[91m[1m::ModelMatrix[22m[39m
[90m1 ─[39m %1 = Core.apply_type(StatsModels.Matrix, StatsModels.Float64)[36m::Core.Const(Matrix{Float64})[39m
[90m│  [39m %2 = Core.apply_type(StatsModels.ModelMatrix, %1)[36m::Core.Const(ModelMatrix{Matrix{Float64}})[39m
[90m│  [39m %3 = (%2)(mf)[91m[1m::ModelMatrix[22m[39m
[90m└──[39m      return %3



In [29]:
@code_warntype response(mf)

MethodInstance for StatsBase.response(::[0mModelFrame{NamedTuple{(:y, :x11, :x6, :x14, :x8, :x20, :x7, :x18, :x15, :x3, :x19), NTuple{11, Vector{Float64}}}, LinearModel})
  from response(mf::ModelFrame; data) in StatsModels at D:\.julia\packages\StatsModels\JZLpf\src\modelframe.jl:148
Arguments
  #self#[36m::Core.Const(StatsBase.response)[39m
  mf[36m::ModelFrame{NamedTuple{(:y, :x11, :x6, :x14, :x8, :x20, :x7, :x18, :x15, :x3, :x19), NTuple{11, Vector{Float64}}}, LinearModel}[39m
Locals
  data[36m::NamedTuple{(:y, :x11, :x6, :x14, :x8, :x20, :x7, :x18, :x15, :x3, :x19), NTuple{11, Vector{Float64}}}[39m
Body[91m[1m::Any[22m[39m
[90m1 ─[39m %1 = Base.getproperty(mf, :data)[36m::NamedTuple{(:y, :x11, :x6, :x14, :x8, :x20, :x7, :x18, :x15, :x3, :x19), NTuple{11, Vector{Float64}}}[39m
[90m│  [39m      (data = %1)
[90m│  [39m %3 = StatsModels.:(var"#response#64")(data, #self#, mf)[91m[1m::Any[22m[39m
[90m└──[39m      return %3



In [30]:
@code_warntype fit(LinearModel, mm.m, y)

MethodInstance for StatsBase.fit(::[0mType{LinearModel}, ::[0mMatrix{Float64}, ::[0mVector{Float64})
  from fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}) in GLM at D:\.julia\packages\GLM\5CcRd\src\lm.jl:156
Arguments
  #self#[36m::Core.Const(StatsBase.fit)[39m
  @_2[36m::Type{LinearModel}[39m
  X[36m::Matrix{Float64}[39m
  y[36m::Vector{Float64}[39m
Body[91m[1m::Union{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}}[22m[39m
[90m1 ─[39m %1 = (#self#)(@_2, X, y, GLM.nothing)[91m[1m::Union{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}}[22m[39m


In [31]:
@code_warntype StatsModels.TableRegressionModel(linmodel, mf, mm)

MethodInstance for StatsModels.TableRegressionModel(::[0mLinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, ::[0mModelFrame{NamedTuple{(:y, :x11, :x6, :x14, :x8, :x20, :x7, :x18, :x15, :x3, :x19), NTuple{11, Vector{Float64}}}, LinearModel}, ::[0mModelMatrix{Matrix{Float64}})
  from StatsModels.TableRegressionModel(model::M, mf::ModelFrame, mm::ModelMatrix{T}) where {M, T} in StatsModels at D:\.julia\packages\StatsModels\JZLpf\src\statsmodel.jl:70
Static Parameters
  M = [36mLinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}[39m
  T = [36mMatrix{Float64}[39m
Arguments
  #self#[36m::Type{StatsModels.TableRegressionModel}[39m
  model[36m::LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}[39m
  mf[36m::ModelFrame{NamedTuple{(:y, :x11, :x6, :x14, :x8, :x20

In [32]:
@code_warntype my_lm(:y, x_vars, df)

MethodInstance for my_lm(::[0mSymbol, ::[0mVector{Symbol}, ::[0mDataFrame)
  from my_lm(y_var::Symbol, x_vars::AbstractVector{Symbol}, df::DataFrame) in Main at In[5]:11
Arguments
  #self#[36m::Core.Const(my_lm)[39m
  y_var[36m::Symbol[39m
  x_vars[36m::Vector{Symbol}[39m
  df[36m::DataFrame[39m
Locals
  linmodel[91m[1m::LinearModel[22m[39m
  X[91m[1m::Matrix[22m[39m
  y[91m[1m::AbstractVector[22m[39m
Body[91m[1m::MyLinearModel{_A, Symbol, Vector{Symbol}} where _A[22m[39m
[90m1 ─[39m      (y = Base.getindex(df, Main.:!, y_var))
[90m│  [39m %2 = Main.nrow(df)[36m::Int64[39m
[90m│  [39m %3 = Main.ones(%2)[36m::Vector{Float64}[39m
[90m│  [39m %4 = Base.getindex(df, Main.:!, x_vars)[36m::DataFrame[39m
[90m│  [39m %5 = Main.Matrix(%4)[91m[1m::Matrix[22m[39m
[90m│  [39m      (X = Base.hcat(%3, %5))
[90m│  [39m      (linmodel = Main.lm(X, y))
[90m│  [39m %8 = Main.MyLinearModel(linmodel, y_var, x_vars)[91m[1m::MyLinearModel{_A, Symbol, Ve