https://github.com/JuliaStats/StatsModels.jl/issues/220

## Reproduce the slow-down (1)

In [1]:
using DataFrames
using GLM
using StatsBase: sample

n = 20
r = 10
x_symbols = [Symbol("x$i") for i in 1:n]
@time df = DataFrame(rand(100, n+1), [:y; x_symbols]);

  0.503047 seconds (1.89 M allocations: 109.199 MiB, 23.70% gc time, 99.95% compilation time)


In [2]:
x_vars = sample(x_symbols, r; replace=false)
@time F = term(:y) ~ sum(term(x) for x in x_vars)
@time cols = Tables.columntable(df)
@time mf = ModelFrame(F, cols, model=LinearModel)
@time mm = ModelMatrix(mf)
@time y = response(mf)
@time linmodel = fit(LinearModel, mm.m, y)
@time regmodel = StatsModels.TableRegressionModel(linmodel, mf, mm)

  0.092762 seconds (279.83 k allocations: 18.107 MiB, 8.61% gc time, 101.50% compilation time)
  0.224474 seconds (653.37 k allocations: 41.294 MiB, 3.27% gc time, 43.61% compilation time)
  1.559864 seconds (5.94 M allocations: 373.246 MiB, 4.94% gc time)
  0.307471 seconds (1.20 M allocations: 75.759 MiB, 5.61% gc time)
  0.006348 seconds (3.78 k allocations: 283.598 KiB, 99.78% compilation time)
  1.156184 seconds (5.13 M allocations: 290.800 MiB, 7.08% gc time, 0.11% compilation time)
  0.003436 seconds (3.76 k allocations: 276.488 KiB, 95.85% compilation time)


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

y ~ 1 + x18 + x19 + x2 + x10 + x5 + x7 + x9 + x13 + x17 + x20

Coefficients:
───────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
───────────────────────────────────────────────────────────────────────────
(Intercept)   0.504175     0.178804   2.82    0.0059   0.148895    0.859454
x18          -0.0576534    0.101695  -0.57    0.5722  -0.25972     0.144413
x19           0.0994866    0.115147   0.86    0.3899  -0.129307    0.328281
x2            0.104482     0.116558   0.90    0.3725  -0.127117    0.336081
x10          -0.0957074    0.113078  -0.85    0.3996  -0.320392    0.128977
x5           -0.0366147    0.114829  -0.32    0.7506  -0.264777    0.191547
x7           -0.0677515    0.113873  -0.59    0.5534  -0.2940

In [3]:
x_vars = sample(x_symbols, r; replace=false)
@time F = term(:y) ~ sum(term(x) for x in x_vars)
@time cols = Tables.columntable(df)
@time mf = ModelFrame(F, cols, model=LinearModel)
@time mm = ModelMatrix(mf)
@time y = response(mf)
@time linmodel = fit(LinearModel, mm.m, y)
@time regmodel = StatsModels.TableRegressionModel(linmodel, mf, mm)

  0.090717 seconds (79.69 k allocations: 5.129 MiB, 15.95% gc time, 99.42% compilation time)
  0.000044 seconds (29 allocations: 1.766 KiB)
  0.055377 seconds (136.59 k allocations: 8.852 MiB, 7.11% compilation time)
  0.061358 seconds (80.30 k allocations: 5.017 MiB, 99.59% compilation time)
  0.006075 seconds (3.77 k allocations: 282.848 KiB, 99.78% compilation time)
  0.000030 seconds (21 allocations: 23.188 KiB)
  0.003412 seconds (2.75 k allocations: 194.801 KiB, 95.94% compilation time)


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

y ~ 1 + x2 + x19 + x6 + x1 + x11 + x15 + x17 + x12 + x20 + x4

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.30595      0.163644    1.87    0.0648  -0.0192076  0.631108
x2            0.112064     0.111616    1.00    0.3181  -0.109714   0.333843
x19           0.105054     0.116221    0.90    0.3685  -0.125875   0.335983
x6           -0.0355344    0.0997205  -0.36    0.7224  -0.233677   0.162608
x1            0.00261201   0.112775    0.02    0.9816  -0.221469   0.226693
x11           0.065533     0.103998    0.63    0.5302  -0.141109   0.272175
x15           0.0604833    0.110128    0.55    0.5842  -0.

## Reproduce the slow-down (2)

In [4]:
using DataFrames
using GLM
using StatsBase: sample

n = 20
r = 10
x_symbols = [Symbol("x$i") for i in 1:n]
df = DataFrame(rand(100, n+1), [:y; x_symbols]);

result = []
for _ in 1:10
    x_vars = sample(x_symbols, r; replace=false)
    F = term(:y) ~ sum(term(x) for x in x_vars)
    @time regmodel = lm(F, df)
    push!(result, regmodel)
end
result

  1.784460 seconds (6.72 M allocations: 457.123 MiB, 11.22% gc time)
  0.126404 seconds (223.50 k allocations: 14.361 MiB, 3.38% compilation time)
  0.127837 seconds (223.50 k allocations: 14.362 MiB, 3.01% compilation time)
  0.134096 seconds (223.50 k allocations: 14.362 MiB, 7.22% gc time, 2.87% compilation time)
  0.125043 seconds (223.51 k allocations: 14.373 MiB, 3.14% compilation time)
  0.126464 seconds (223.51 k allocations: 14.364 MiB, 3.28% compilation time)
  0.136653 seconds (223.51 k allocations: 14.368 MiB, 5.89% gc time, 3.34% compilation time)
  0.123660 seconds (223.50 k allocations: 14.361 MiB, 3.08% compilation time)
  0.122886 seconds (223.51 k allocations: 14.364 MiB, 3.05% compilation time)
  0.147046 seconds (223.50 k allocations: 14.364 MiB, 5.33% gc time, 6.86% compilation time)


10-element Vector{Any}:
 StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

y ~ 1 + x19 + x5 + x3 + x16 + x6 + x18 + x7 + x14 + x17 + x8

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.566172     0.17108     3.31    0.0014   0.22624    0.906104
x19          -0.0457204    0.0938366  -0.49    0.6273  -0.232172   0.140731
x5            0.0956336    0.0971749   0.98    0.3277  -0.0974508  0.288718
x3           -0.173468     0.0978875  -1.77    0.0798  -0.367969   0.0210319
x16           0.0243709    0.0997933   0.24    0.8076  -0.173916   0.222658
x6           -0.0300946    0.103512   -0.29    0.7719  -0.235772   0.175582
x18          -0.121171     0.1030

## Solution

In [5]:
using DataFrames
using GLM
using StatsBase: sample

struct MyLinearModel{T, Y, X}
    linmodel::T
    y_var::Y
    x_vars::X
end

function my_lm(y_var::Symbol, x_vars::AbstractVector{Symbol}, df::DataFrame)
    y = df[!, y_var]
    X = [ones(nrow(df)) Matrix(df[!, x_vars])]
    linmodel = lm(X, y)
    MyLinearModel(linmodel, y_var, x_vars)
end

get_y_var(F::FormulaTerm) = F.lhs.sym
get_x_vars(F::FormulaTerm) = collect((t -> t.sym).(F.rhs))
my_lm(F::FormulaTerm, df::DataFrame) = my_lm(get_y_var(F), get_x_vars(F), df)

function Base.show(io::IO, mylm::MyLinearModel)
    linmodel, y_var, x_vars = getfield.(Ref(mylm), (:linmodel, :y_var, :x_vars))
    ct = coeftable(linmodel)
    ct.rownms .= string.((Symbol("(Intercept)"), x_vars...,))
    print(io, typeof(mylm), "\n\n")
    print(io, y_var, " ~ 1")
    for x in x_vars print(io, " + ", x) end
    print(io, "\n\nCoefficients:\n")
    show(io, ct)
    print(io, "\n")
end

n = 20
r = 10
x_symbols = [Symbol("x$i") for i in 1:n]
df = DataFrame(rand(100, n+1), [:y; x_symbols]);

myresult = []
for _ in 1:10
    x_vars = sample(x_symbols, r; replace=false)
    F = term(:y) ~ sum(term(x) for x in x_vars)
    @time mylinmodel = my_lm(F, df)
    push!(myresult, mylinmodel)
end
myresult

  0.278681 seconds (770.34 k allocations: 48.130 MiB, 3.17% gc time)
  0.000040 seconds (63 allocations: 43.859 KiB)
  0.000026 seconds (63 allocations: 43.859 KiB)
  0.000024 seconds (63 allocations: 43.859 KiB)
  0.000055 seconds (63 allocations: 43.859 KiB)
  0.000041 seconds (63 allocations: 43.859 KiB)
  0.000065 seconds (63 allocations: 43.859 KiB)
  0.000047 seconds (63 allocations: 43.859 KiB)
  0.000032 seconds (63 allocations: 43.859 KiB)
  0.000032 seconds (63 allocations: 43.859 KiB)


10-element Vector{Any}:
 MyLinearModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Symbol, Vector{Symbol}}

y ~ 1 + x12 + x16 + x8 + x5 + x9 + x3 + x10 + x1 + x20 + x11

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.568588     0.196582    2.89    0.0048   0.177984   0.959192
x12           0.0123412    0.114897    0.11    0.9147  -0.215956   0.240639
x16           0.0712865    0.0978897   0.73    0.4684  -0.123218   0.265791
x8           -0.00384715   0.108042   -0.04    0.9717  -0.218525   0.210831
x5            0.11771      0.100797    1.17    0.2460  -0.0825707  0.317991
x9            3.64984e-5   0.111158    0.00    0.9997  -0.220832   0.220905
x3           -0.0391247    0.106687   -0.37   

## Comparison

In [6]:
F = @formula(y ~ x9 + x5 + x1 + x4 + x6 + x8 + x7 + x2 + x3)
@time lm(F, df)

  0.463994 seconds (1.05 M allocations: 65.980 MiB, 3.88% gc time, 26.35% compilation time)


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

y ~ 1 + x9 + x5 + x1 + x4 + x6 + x8 + x7 + x2 + x3

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.401332      0.173235   2.32    0.0228   0.0571705   0.745493
x9            0.00853425    0.11292    0.08    0.9399  -0.215802    0.23287
x5            0.142828      0.105807   1.35    0.1804  -0.067377    0.353033
x1           -0.0644377     0.102795  -0.63    0.5323  -0.268657    0.139782
x4            0.123616      0.103691   1.19    0.2363  -0.0823846   0.329617
x6            0.110805      0.10741    1.03    0.3050  -0.102584    0.324194
x8           -0.0567779     0.110607  -0.51    0.6090  -0.276518

In [7]:
F = @formula(y ~ x9 + x5 + x1 + x4 + x6 + x8 + x7 + x3 + x2)
@time my_lm(F, df)

  0.028297 seconds (156.59 k allocations: 10.167 MiB, 99.41% compilation time)


MyLinearModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Symbol, Vector{Symbol}}

y ~ 1 + x9 + x5 + x1 + x4 + x6 + x8 + x7 + x3 + x2

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.401332      0.173235   2.32    0.0228   0.0571705   0.745493
x9            0.00853425    0.11292    0.08    0.9399  -0.215802    0.23287
x5            0.142828      0.105807   1.35    0.1804  -0.067377    0.353033
x1           -0.0644377     0.102795  -0.63    0.5323  -0.268657    0.139782
x4            0.123616      0.103691   1.19    0.2363  -0.0823846   0.329617
x6            0.110805      0.10741    1.03    0.3050  -0.102584    0.324194
x8           -0.0567779     0.110607  -0.51    0.6090  -0.276518    0.162962

In [8]:
F = @formula(y ~ x9 + x5 + x1 + x4 + x6 + x8 + x3 + x7 + x2)
@time my_lm(F, df)

  0.000064 seconds (56 allocations: 39.984 KiB)


MyLinearModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Symbol, Vector{Symbol}}

y ~ 1 + x9 + x5 + x1 + x4 + x6 + x8 + x3 + x7 + x2

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.401332      0.173235   2.32    0.0228   0.0571705   0.745493
x9            0.00853425    0.11292    0.08    0.9399  -0.215802    0.23287
x5            0.142828      0.105807   1.35    0.1804  -0.067377    0.353033
x1           -0.0644377     0.102795  -0.63    0.5323  -0.268657    0.139782
x4            0.123616      0.103691   1.19    0.2363  -0.0823846   0.329617
x6            0.110805      0.10741    1.03    0.3050  -0.102584    0.324194
x8           -0.0567779     0.110607  -0.51    0.6090  -0.276518    0.162962

In [9]:
F = @formula(y ~ x9 + x5 + x1 + x4 + x6 + x8 + x3 + x2 + x7)
y_var, x_vars = get_y_var(F), get_x_vars(F)
@time my_lm(y_var, x_vars, df)

  0.000067 seconds (55 allocations: 39.828 KiB)


MyLinearModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Symbol, Vector{Symbol}}

y ~ 1 + x9 + x5 + x1 + x4 + x6 + x8 + x3 + x2 + x7

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.401332      0.173235   2.32    0.0228   0.0571705   0.745493
x9            0.00853425    0.11292    0.08    0.9399  -0.215802    0.23287
x5            0.142828      0.105807   1.35    0.1804  -0.067377    0.353033
x1           -0.0644377     0.102795  -0.63    0.5323  -0.268657    0.139782
x4            0.123616      0.103691   1.19    0.2363  -0.0823846   0.329617
x6            0.110805      0.10741    1.03    0.3050  -0.102584    0.324194
x8           -0.0567779     0.110607  -0.51    0.6090  -0.276518    0.162962

## Analysis of the slow-down

In [10]:
x_vars = sample(x_symbols, r; replace=false)
@show x_vars
@time F = term(:y) ~ sum(term(x) for x in x_vars)
@time lm(F, df)

x_vars = [:x13, :x19, :x5, :x11, :x2, :x8, :x16, :x17, :x10, :x12]
  0.064104 seconds (79.67 k allocations: 5.126 MiB, 99.49% compilation time)
  0.124985 seconds (223.50 k allocations: 14.364 MiB, 3.20% compilation time)


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

y ~ 1 + x13 + x19 + x5 + x11 + x2 + x8 + x16 + x17 + x10 + x12

Coefficients:
───────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
───────────────────────────────────────────────────────────────────────────
(Intercept)   0.655442    0.179835    3.64    0.0004   0.298114   1.01277
x13          -0.0387303   0.111195   -0.35    0.7284  -0.259672   0.182211
x19           0.0350278   0.105879    0.33    0.7416  -0.175351   0.245406
x5            0.105083    0.0978979   1.07    0.2860  -0.0894382  0.299604
x11          -0.0919563   0.0995684  -0.92    0.3582  -0.289797   0.105884
x2            0.0210218   0.11058     0.19    0.8497  -0.198698   0.240742
x8           -0.0207182   0.110514   -0.19    0.8517  -0.240308   0

In [11]:
@time my_lm(F, df)

  0.000062 seconds (57 allocations: 43.672 KiB)


MyLinearModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Symbol, Vector{Symbol}}

y ~ 1 + x13 + x19 + x5 + x11 + x2 + x8 + x16 + x17 + x10 + x12

Coefficients:
───────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
───────────────────────────────────────────────────────────────────────────
(Intercept)   0.655442    0.179835    3.64    0.0004   0.298114   1.01277
x13          -0.0387303   0.111195   -0.35    0.7284  -0.259672   0.182211
x19           0.0350278   0.105879    0.33    0.7416  -0.175351   0.245406
x5            0.105083    0.0978979   1.07    0.2860  -0.0894382  0.299604
x11          -0.0919563   0.0995684  -0.92    0.3582  -0.289797   0.105884
x2            0.0210218   0.11058     0.19    0.8497  -0.198698   0.240742
x8           -0.0207182   0.110514   -0.19    0.8517  -0.240308   0.198871
x16 

In [12]:
x_vars = sample(x_symbols, r; replace=false)
@time F = term(:y) ~ sum(term(x) for x in x_vars)
@time my_lm(F, df)

  0.075675 seconds (79.68 k allocations: 5.130 MiB, 99.10% compilation time)
  0.000055 seconds (57 allocations: 43.672 KiB)


MyLinearModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Symbol, Vector{Symbol}}

y ~ 1 + x17 + x10 + x19 + x5 + x6 + x4 + x18 + x12 + x7 + x9

Coefficients:
─────────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)   Lower 95%    Upper 95%
─────────────────────────────────────────────────────────────────────────────
(Intercept)   0.308478    0.187583    1.64    0.1036  -0.0642461   0.681201
x17          -0.200713    0.0982167  -2.04    0.0440  -0.395868   -0.00555905
x10          -0.0956124   0.106189   -0.90    0.3703  -0.306608    0.115383
x19           0.0533096   0.101913    0.52    0.6022  -0.149189    0.255809
x5            0.123523    0.0956868   1.29    0.2001  -0.0666042   0.313651
x6            0.077276    0.104575    0.74    0.4619  -0.130512    0.285064
x4            0.134067    0.0996468   1.35    0.1819  -0.0639295  

In [13]:
x_vars = sample(x_symbols, r; replace=false)
@time F = term(:y) ~ sum(term(x) for x in x_vars)
@time cols = Tables.columntable(df)
@time mf = ModelFrame(F, cols, model=LinearModel)
@time mm = ModelMatrix(mf)
@time y = response(mf)
@time linmodel = fit(LinearModel, mm.m, y)
@time regmodel = StatsModels.TableRegressionModel(linmodel, mf, mm);

  0.069556 seconds (79.68 k allocations: 5.143 MiB, 99.47% compilation time)
  0.000023 seconds (29 allocations: 1.766 KiB)
  0.068029 seconds (136.64 k allocations: 8.856 MiB, 13.39% gc time, 6.09% compilation time)
  0.061614 seconds (80.29 k allocations: 5.017 MiB, 99.57% compilation time)
  0.006189 seconds (3.77 k allocations: 282.191 KiB, 99.75% compilation time)
  0.000027 seconds (21 allocations: 23.188 KiB)
  0.003430 seconds (2.75 k allocations: 194.645 KiB, 96.01% compilation time)


In [14]:
typeof(cols)

NamedTuple{(:y, :x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :x10, :x11, :x12, :x13, :x14, :x15, :x16, :x17, :x18, :x19, :x20), NTuple{21, Vector{Float64}}}

In [15]:
typeof(mf)

ModelFrame{NamedTuple{(:y, :x10, :x16, :x17, :x14, :x3, :x8, :x1, :x7, :x18, :x4), NTuple{11, Vector{Float64}}}, LinearModel}

In [16]:
typeof(mm) |> x -> (fieldnames(x), fieldtypes(x))

((:m, :assign), (Matrix{Float64}, Vector{Int64}))

In [17]:
typeof(y)

Vector{Float64} (alias for Array{Float64, 1})

In [18]:
typeof(linmodel)

LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}

In [19]:
typeof(regmodel)

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

In [20]:
regmodel.model == linmodel

true

In [21]:
regmodel.mf == mf

true

In [22]:
regmodel.mm == mm

true

In [23]:
@which lm(F, df)

In [24]:
@which fit(LinearModel, F, df)

In [25]:
@which fit(LinearModel, mm.m, y)

In [26]:
@code_warntype term(:y) ~ sum(term(x) for x in x_vars)

MethodInstance for ~(::[0mTerm, ::[0mNTuple{10, Term})
  from ~(lhs::Union{Tuple{AbstractTerm, Vararg{AbstractTerm}}, AbstractTerm}, rhs::Union{Tuple{AbstractTerm, Vararg{AbstractTerm}}, AbstractTerm}) in StatsModels at D:\.julia\packages\StatsModels\JZLpf\src\terms.jl:401
Arguments
  #self#[36m::Core.Const(~)[39m
  lhs[36m::Term[39m
  rhs[36m::NTuple{10, Term}[39m
Body[36m::FormulaTerm{Term, NTuple{10, Term}}[39m
[90m1 ─[39m %1 = StatsModels.FormulaTerm(lhs, rhs)[36m::FormulaTerm{Term, NTuple{10, Term}}[39m
[90m└──[39m      return %1



In [27]:
@code_warntype Tables.columntable(df)

MethodInstance for Tables.columntable(::[0mDataFrame)
  from columntable(itr::T) where T in Tables at D:\.julia\packages\Tables\gg6Id\src\namedtuples.jl:170
Static Parameters
  T = [36mDataFrame[39m
Arguments
  #self#[36m::Core.Const(Tables.columntable)[39m
  itr[36m::DataFrame[39m
Locals
  cols[36m::DataFrames.DataFrameColumns{DataFrame}[39m
Body[91m[1m::NamedTuple[22m[39m
[90m1 ─[39m      (cols = Tables.columns(itr))
[90m│  [39m %2 = (cols isa Tables.ColumnTable)[36m::Core.Const(false)[39m
[90m└──[39m      goto #3 if not %2
[90m2 ─[39m      Core.Const(:(return cols))
[90m3 ┄[39m %5 = Tables.schema(cols)[91m[1m::Tables.Schema[22m[39m
[90m│  [39m %6 = Tables.columntable(%5, cols)[91m[1m::NamedTuple[22m[39m
[90m└──[39m      return %6



In [28]:
@code_warntype ModelFrame(F, cols, model=LinearModel)

MethodInstance for (::Core.var"#Type##kw")(::[0mNamedTuple{(:model,), Tuple{UnionAll}}, ::[0mType{ModelFrame}, ::[0mFormulaTerm{Term, NTuple{10, Term}}, ::[0mNamedTuple{(:y, :x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :x10, :x11, :x12, :x13, :x14, :x15, :x16, :x17, :x18, :x19, :x20), NTuple{21, Vector{Float64}}})
  from (::Core.var"#Type##kw")(::Any, ::Type{ModelFrame}, f::FormulaTerm, data::NamedTuple{names, T} where {N, D, names, T<:Tuple{Vararg{AbstractArray{S, D} where S, N}}}) in StatsModels at D:\.julia\packages\StatsModels\JZLpf\src\modelframe.jl:72
Arguments
  _[36m::Core.Const(Core.var"#Type##kw"())[39m
  @_2[36m::NamedTuple{(:model,), Tuple{UnionAll}}[39m
  @_3[36m::Type{ModelFrame}[39m
  f[36m::FormulaTerm{Term, NTuple{10, Term}}[39m
  data[36m::NamedTuple{(:y, :x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :x10, :x11, :x12, :x13, :x14, :x15, :x16, :x17, :x18, :x19, :x20), NTuple{21, Vector{Float64}}}[39m
Locals
  model[91m[1m::UnionAll[22m[39m
  contrasts[

In [29]:
@code_warntype ModelMatrix(mf)

MethodInstance for ModelMatrix(::[0mModelFrame{NamedTuple{(:y, :x10, :x16, :x17, :x14, :x3, :x8, :x1, :x7, :x18, :x4), NTuple{11, Vector{Float64}}}, LinearModel})
  from ModelMatrix(mf::ModelFrame) in StatsModels at D:\.julia\packages\StatsModels\JZLpf\src\modelframe.jl:222
Arguments
  #self#[36m::Type{ModelMatrix}[39m
  mf[36m::ModelFrame{NamedTuple{(:y, :x10, :x16, :x17, :x14, :x3, :x8, :x1, :x7, :x18, :x4), NTuple{11, Vector{Float64}}}, LinearModel}[39m
Body[91m[1m::ModelMatrix[22m[39m
[90m1 ─[39m %1 = Core.apply_type(StatsModels.Matrix, StatsModels.Float64)[36m::Core.Const(Matrix{Float64})[39m
[90m│  [39m %2 = Core.apply_type(StatsModels.ModelMatrix, %1)[36m::Core.Const(ModelMatrix{Matrix{Float64}})[39m
[90m│  [39m %3 = (%2)(mf)[91m[1m::ModelMatrix[22m[39m
[90m└──[39m      return %3



In [30]:
@code_warntype response(mf)

MethodInstance for StatsBase.response(::[0mModelFrame{NamedTuple{(:y, :x10, :x16, :x17, :x14, :x3, :x8, :x1, :x7, :x18, :x4), NTuple{11, Vector{Float64}}}, LinearModel})
  from response(mf::ModelFrame; data) in StatsModels at D:\.julia\packages\StatsModels\JZLpf\src\modelframe.jl:148
Arguments
  #self#[36m::Core.Const(StatsBase.response)[39m
  mf[36m::ModelFrame{NamedTuple{(:y, :x10, :x16, :x17, :x14, :x3, :x8, :x1, :x7, :x18, :x4), NTuple{11, Vector{Float64}}}, LinearModel}[39m
Locals
  data[36m::NamedTuple{(:y, :x10, :x16, :x17, :x14, :x3, :x8, :x1, :x7, :x18, :x4), NTuple{11, Vector{Float64}}}[39m
Body[91m[1m::Any[22m[39m
[90m1 ─[39m %1 = Base.getproperty(mf, :data)[36m::NamedTuple{(:y, :x10, :x16, :x17, :x14, :x3, :x8, :x1, :x7, :x18, :x4), NTuple{11, Vector{Float64}}}[39m
[90m│  [39m      (data = %1)
[90m│  [39m %3 = StatsModels.:(var"#response#64")(data, #self#, mf)[91m[1m::Any[22m[39m
[90m└──[39m      return %3



In [31]:
@code_warntype fit(LinearModel, mm.m, y)

MethodInstance for StatsBase.fit(::[0mType{LinearModel}, ::[0mMatrix{Float64}, ::[0mVector{Float64})
  from fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}) in GLM at D:\.julia\packages\GLM\5CcRd\src\lm.jl:156
Arguments
  #self#[36m::Core.Const(StatsBase.fit)[39m
  @_2[36m::Type{LinearModel}[39m
  X[36m::Matrix{Float64}[39m
  y[36m::Vector{Float64}[39m
Body[91m[1m::Union{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}}[22m[39m
[90m1 ─[39m %1 = (#self#)(@_2, X, y, GLM.nothing)[91m[1m::Union{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}}[22m[39m


In [32]:
@code_warntype StatsModels.TableRegressionModel(linmodel, mf, mm)

MethodInstance for StatsModels.TableRegressionModel(::[0mLinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, ::[0mModelFrame{NamedTuple{(:y, :x10, :x16, :x17, :x14, :x3, :x8, :x1, :x7, :x18, :x4), NTuple{11, Vector{Float64}}}, LinearModel}, ::[0mModelMatrix{Matrix{Float64}})
  from StatsModels.TableRegressionModel(model::M, mf::ModelFrame, mm::ModelMatrix{T}) where {M, T} in StatsModels at D:\.julia\packages\StatsModels\JZLpf\src\statsmodel.jl:70
Static Parameters
  M = [36mLinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}[39m
  T = [36mMatrix{Float64}[39m
Arguments
  #self#[36m::Type{StatsModels.TableRegressionModel}[39m
  model[36m::LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}[39m
  mf[36m::ModelFrame{NamedTuple{(:y, :x10, :x16, :x17, :x14, :x3

In [33]:
@code_warntype my_lm(:y, x_vars, df)

MethodInstance for my_lm(::[0mSymbol, ::[0mVector{Symbol}, ::[0mDataFrame)
  from my_lm(y_var::Symbol, x_vars::AbstractVector{Symbol}, df::DataFrame) in Main at In[5]:11
Arguments
  #self#[36m::Core.Const(my_lm)[39m
  y_var[36m::Symbol[39m
  x_vars[36m::Vector{Symbol}[39m
  df[36m::DataFrame[39m
Locals
  linmodel[91m[1m::LinearModel[22m[39m
  X[91m[1m::Matrix[22m[39m
  y[91m[1m::AbstractVector[22m[39m
Body[91m[1m::MyLinearModel{_A, Symbol, Vector{Symbol}} where _A[22m[39m
[90m1 ─[39m      (y = Base.getindex(df, Main.:!, y_var))
[90m│  [39m %2 = Main.nrow(df)[36m::Int64[39m
[90m│  [39m %3 = Main.ones(%2)[36m::Vector{Float64}[39m
[90m│  [39m %4 = Base.getindex(df, Main.:!, x_vars)[36m::DataFrame[39m
[90m│  [39m %5 = Main.Matrix(%4)[91m[1m::Matrix[22m[39m
[90m│  [39m      (X = Base.hcat(%3, %5))
[90m│  [39m      (linmodel = Main.lm(X, y))
[90m│  [39m %8 = Main.MyLinearModel(linmodel, y_var, x_vars)[91m[1m::MyLinearModel{_A, Symbol, Ve