In [2]:
using Random, Distributions

In [3]:
function draw(a, b; N=100000)
    
    x = rand(Uniform(0,1), N)

    ϵ = rand(Normal(0.0, 0.5), N)

    cst = b*ones(N)
    y = cst + a*x + ϵ

    return (;x, y)
end

draw (generic function with 1 method)

In [4]:
d = draw(2.5, 0.4)

(x = [0.4350151469510595, 0.08755034603683709, 0.7899973864385258, 0.874088056801624, 0.5390167090357885, 0.05268300781460711, 0.6849597032810697, 0.5311955089564023, 0.9306257211897302, 0.32248421501998736  …  0.2588353894645916, 0.7417784023896656, 0.8092200504615876, 0.5190211606478282, 0.6000380119119957, 0.5648923596703321, 0.0015548807316835278, 0.4521861812769513, 0.4812921764078668, 0.6066125513888361], y = [1.2256866668714648, 0.2873909810504014, 2.634126516050436, 2.9683244531366575, 1.4618986157618603, 0.442396654137024, 2.062893880898062, 2.3547164496024235, 2.727925334162077, 1.2383591211327187  …  0.9671189696726858, 1.6844711756478992, 2.788742524047944, 2.548515550216168, 1.2136192348849302, 1.5791053251490803, -0.06762960681892466, 1.5619694453249342, 1.6916089126075584, 2.528741548802491])

In [5]:
using ForwardDiff

In [6]:
function loss(d, θ)

    N = length(d.x)
    x = d.x
    y = d.y
    a = θ[1]
    b = θ[2]
    
    ll = sum( (a*x[i] + b - y[i])^2 for i in 1:N) / N
    return ll 

end

loss (generic function with 1 method)

In [7]:
loss2min = θ -> loss(d, θ)

#14 (generic function with 1 method)

In [8]:
θ0 = [2.5; 0.4]
loss2min(θ0)
abs(loss2min(θ0))

0.2507676879915558

In [9]:
function gd(f, θ0, λ; N=1000, τ_η=1e-10, τ_ϵ=1e-10)
    
    θ1 = θ0
    local θ1
    
    for n in 1:N
        θ1 = θ0 - λ*ForwardDiff.gradient(f, θ0)

        η = maximum(abs, θ1 - θ0)
        ϵ = abs(f(θ0))

        # @show(n, ϵ, η)
        
        if η < τ_η || ϵ < τ_ϵ
            break
        end

        θ0 = θ1
    end
    return θ1
end

gd (generic function with 1 method)

In [10]:
θ0 = [2, 0.5]
λ = 0.1
gd(loss2min, θ0, λ)

2-element Vector{Float64}:
 2.49663379758633
 0.4012365762766747

In [11]:
function bestL(θstar, θ0)
    λstar = 0.0  # Start with a default
    for j in 0.1:0.05:1
        θ = gd(loss2min, θ0, j)
        η = maximum(abs, θstar - θ)
        if η > 1e-1 
            λstar = j 
            break
        end
    end
    return λstar
end


bestL (generic function with 1 method)

In [12]:
θstar = [2.5; 0.4]
bestL(θstar, θ0)

0.8

In [13]:
rand(d.x) 

0.7432706835733192

In [14]:
import ForwardDiff: derivative

In [15]:
function ξfun(d, a, b)

    ii = rand(1:length(d.x))
    x = d.x
    y = d.y
    emploss(a, b, x, y) = (a*x[ii] + b - y[ii])^2

    ξ = emploss(a, b, x, y) 
    A = derivative(u->emploss(u, b, x, y), a)
    B = derivative(u->emploss(a, u, x, y), b)

    return ξ, A, B
end

ξfun (generic function with 1 method)

In [16]:
ξfun(d, 2.5, 0.4)

(0.17158079780681698, 0.6597242955925863, 0.8284462512603143)

In [17]:
ξfun

ξfun (generic function with 1 method)

In [37]:
function sgd(d, a0, b0; λ=0.01, N=100000, τ_η=1e-10)

    a1 = a0
    b1 = b0
    nend = 0

    for n in 1:N
            
        ξ, A, B = ξfun(d, a0, b0)

        a1 = a0 - λ*A
        b1 = b0 - λ*B

        ηa = maximum(abs, a1 - a0)
        ηb = maximum(abs, b1 - b0)
        η = max(ηa, ηb)

        # @show n η
        
        if η < τ_η
            break
        end

        a0 = a1
        b0 = b1
        nend = n

    end
    return a1, b1, nend
end

sgd (generic function with 1 method)

In [38]:
sgd(d, 0.0, 0.0)

(2.417446929230463, 0.3974855172219214, 100000)