## Symbolic Regression 

### The XOR Problem

In [1]:
using SymbolicRegression, MLJ

In [5]:
x = (x1 = Float64[1, 1, 0, 0], x2 = Float64[1, 0, 1, 0])
y = Float64[0, 1, 1, 0];

In [3]:
x

(x1 = [1.0, 1.0, 0.0, 0.0],
 x2 = [1.0, 0.0, 1.0, 0.0],)

In [4]:
y

4-element Vector{Float64}:
 0.0
 1.0
 1.0
 0.0

In [19]:
model = SRRegressor(
           niterations=50,
           binary_operators = [+, -, *],
           unary_operators = [abs],
           should_simplify = true,
           save_to_file = false
);

In [20]:
mach = machine(model, x, y)

untrained Machine; caches model-specific representations of data
  model: SRRegressor(binary_operators = Function[+, -, *], …)
  args: 
    1:	Source @629 ⏎ Table{AbstractVector{Continuous}}
    2:	Source @749 ⏎ AbstractVector{Continuous}


In [21]:
fit!(mach, verbosity = 0)

[33m[1m└ [22m[39m[90m@ SymbolicRegression ~/.julia/packages/SymbolicRegression/XKtla/src/SymbolicRegression.jl:546[39m


trained Machine; caches model-specific representations of data
  model: SRRegressor(binary_operators = Function[+, -, *], …)
  args: 
    1:	Source @629 ⏎ Table{AbstractVector{Continuous}}
    2:	Source @749 ⏎ AbstractVector{Continuous}


In [22]:
report(mach)

(best_idx = 2,
 equations = DynamicExpressions.EquationModule.Node{Float64}[0.49999999999953443, abs(x2 - x1)],
 equation_strings = ["0.49999999999953443", "abs(x2 - x1)"],
 losses = [0.25, 0.0],
 complexities = [1, 4],
 scores = [36.04365338911715, 12.014551129705717],)

In [23]:
predict(mach, x)

4-element Vector{Float64}:
 0.0
 1.0
 1.0
 0.0

In [24]:
y

4-element Vector{Float64}:
 0.0
 1.0
 1.0
 0.0

_________________________________

### Multiple Linear Regression

In [25]:
n = 100
x1 = rand(n)
x2 = rand(n)
e  = randn(n)
y = @. 5.0 + 5.0 * x1 + 5.0 * x2 + e 
hcat(x1, x2, e, y)

100×4 Matrix{Float64}:
 0.138917   0.682485   -0.58824    8.51877
 0.531597   0.679788    0.475486  11.5324
 0.675932   0.789144    1.55958   13.885
 0.152677   0.973508   -0.581972  10.049
 0.881541   0.62497    -1.86629   10.6663
 0.707281   0.499442    0.770999  11.8046
 0.239886   0.815299   -2.10495    8.17097
 0.964792   0.493836   -0.12281   12.1703
 0.42657    0.219206   -0.948479   7.2804
 0.200197   0.66679    -1.56679    7.76814
 0.423647   0.609216    0.228466  10.3928
 0.886924   0.989866    1.78651   16.1705
 0.104968   0.776594   -0.476318   8.93149
 ⋮                                
 0.563341   0.766334    0.580689  12.2291
 0.789549   0.757934    0.18192   12.9193
 0.0965075  0.0169687   0.732317   6.2997
 0.946819   0.782775    0.139021  13.787
 0.741317   0.400415    1.31655   12.0252
 0.95654    0.873976    0.484256  14.6368
 0.237202   0.762834   -0.928244   9.07193
 0.888619   0.174577   -0.739033   9.57695
 0.900598   0.933838    0.458701  14.6309
 0.260953   0.0

In [26]:
x = (x1 = x1, x2 = x2);

In [27]:
model = SRRegressor(
           niterations=50,
           binary_operators = [+, -, *, /, ^],
           unary_operators = [],
           should_simplify = true,
           save_to_file = false,
           should_optimize_constants = true
);

In [28]:
mach = machine(model, x, y)

untrained Machine; caches model-specific representations of data
  model: SRRegressor(binary_operators = Function[+, -, *, /, ^], …)
  args: 
    1:	Source @842 ⏎ Table{AbstractVector{Continuous}}
    2:	Source @664 ⏎ AbstractVector{Continuous}


In [29]:
fit!(mach, verbosity = 0)

[33m[1m└ [22m[39m[90m@ SymbolicRegression ~/.julia/packages/SymbolicRegression/XKtla/src/SymbolicRegression.jl:546[39m


trained Machine; caches model-specific representations of data
  model: SRRegressor(binary_operators = Function[+, -, *, /, ^], …)
  args: 
    1:	Source @842 ⏎ Table{AbstractVector{Continuous}}
    2:	Source @664 ⏎ AbstractVector{Continuous}


In [30]:
report(mach)

(best_idx = 4,
 equations = DynamicExpressions.EquationModule.Node{Float64}[10.232315007067802, (x1 + 9.746550911010067), ((x1 * 6.310149195723144) + 7.167071080353323), (((x1 + x2) * 5.054068687237679) + 5.018626666960289), (((x1 * 5.180283069347846) + 5.03230998980253) + (x2 * 4.91667179728503)), (((x1 * 4.605820339957924) + 5.288226281086519) + (x2 * (x1 + 4.437620668971425))), (((((x1 + x2) * 5.428241379008618) + (x2 ^ x1)) + 4.428128295392362) - x2), ((((x1 * 5.0313576105489926) + 5.544142572391885) + (x2 * (x1 + (3.9585695406457484 + x1)))) - x1), ((((x1 * 5.0313576105489926) + 5.544142572391885) + (x2 * (((x1 ^ x2) + x1) + 3.9585695406457484))) - x1), ((((x1 * 5.0313576105489926) + 5.544142572391885) + (x2 * (((x1 ^ x2) + x1) + 3.9585695406457484))) - (x1 ^ 0.9009793611948865))],
 equation_strings = ["10.232315007067802", "(x1 + 9.746550911010067)", "((x1 * 6.310149195723144) + 7.167071080353323)", "(((x1 + x2) * 5.054068687237679) + 5.018626666960289)", "(((x1 * 5.1802830693478

```julia
(((x1 + x2) * 5.054068687237679) + 5.018626666960289)
```