In [None]:
#= Machine Learning Algorithms in Julia
This notebook contains supervised and (some) unsupervised machine learning algorithms. 
=#

In [1]:
function knn(X, Y, x, k)
    n = size(X)[1]
    dists_sq = [sum((X[i,:] .- x).^2) for i=1:n] #find distances of examples to x
    nearest_neighbor_idxs = sortperm(dists_sq)[1:k] #find k-nearest neighbors
    y_hat = sum(Y[nearest_neighbor_idxs, :], dims=1)/k #average k-nearest neighbors
    return y_hat
end

knn (generic function with 1 method)

In [2]:
function softknn(X, Y, x, rho)
    n = size(X)[1]
    exp_weights = [exp(-sum((X[i,:] .- x).^2)/rho^2) for i=1:n]
    w = exp_weights / sum(exp_weights) #find weights
    y_hat = sum(w .* Y, dims=1) #weighted combination of Y
    return y_hat
end

softknn (generic function with 1 method)

In [3]:
function ridgeregressionconstfeature(X,Y,lambda)
    n,d = size(X)
    m = size(Y,2)
    E = [zeros(d-1,1) I(d-1)]
    A = [X; sqrt(lambda*n)*E]
    B = [Y; zeros(d-1,m)]
    theta = A\B
end

ridgeregressionconstfeature (generic function with 1 method)

In [4]:
function tiltedLoss(y_hat, y, tao)
    if (y_hat - y) < 0
        return -tao * (y_hat - y)
    else
        return (1-tao) * (y_hat - y)
    end
end

tiltedLoss (generic function with 1 method)

In [5]:
using Flux
using LinearAlgebra

function regression_fit(X, Y, l, r, lambda; numiters = 500)
    #     Inputs:
    #         X: input data
    #         Y: output data
    #         l: l(yhat, y)
    #         r: r(theta)
    #         lambda: regularization hyper-parameter
    #         numiters (optional): number of iterations
        
        data = zip(eachrow(X), eachrow(Y))
        n,d = size(X)
        theta = ones(d)
        predicty(x) = theta'*x
        loss(x, y) = l(predicty(x), y[1])
        cost(x,y) = loss(x,y) + lambda*r(theta)
        risk() = sum((cost(d...) for d in data))/n
        opt = Flux.ADAGrad()
        losses = []
        tracker() = push!(losses, risk())
        Flux.@epochs numiters Flux.train!(cost, Flux.params(theta), data, opt, cb = Flux.throttle(tracker,10))
        return theta
    end

regression_fit (generic function with 1 method)

In [6]:
function nnregression(X, Y, lambda; numiters=40)
    d = size(X,2); m = size(Y,2)
    model = Chain(
        Dense(d, 10, relu),
        Dense(10, 10, relu),
        Dense(10, 10, relu),
        Dense(10, m, identity))
    data = zip(eachrow(X), eachrow(Y))
    
    normsquared(x) = sum(x.*x)
    
    reg() = sum([normsquared(m.W) for m in model])
    predicty(x) = model(x)
    
    loss(x,y) = normsquared(predicty(x)-y)
    cost(x,y) = loss(x,y) + lambda*reg()    
    opt = Flux.Descent(0.001)
    Flux.@epochs numiters Flux.train!(cost, Flux.params(model), data, opt)
    return model
end

nnregression (generic function with 1 method)

In [7]:
function multi_logistic(X, Y, reps)
    # data sizes
    n,d = size(X)
    m = size(Y,2)
    
    # linear predictor parameter
    theta = zeros(d,m)

    # predictor
    predicty(x) = theta'*x
    margin(pi, pj, y) = (2*dot(pi-pj,y) + dot(pj,pj) - dot(pi,pi)) / (2*norm(pi-pj) + 1e-10)
    multilogisticloss(yhat, y) = sum([exp(margin(r, y, yhat)) for r in reps])
    loss(x,y) = multilogisticloss(predicty(x), y)

    data = zip(eachrow(X), eachrow(Y))
    opt = ADAMW()
    Flux.@epochs 500 Flux.train!(loss, params(theta), data, opt)
    return predicty, theta
end

multi_logistic (generic function with 1 method)

In [8]:
function square_I(x, theta)
    return norm(x - theta)
end

function abslt_I(x, theta)
    return norm(x - theta, 1)
end

function k_means(k, x, thetas)
    min = norm(x - thetas[1,:])
    for i = 2:k
        if norm(x - thetas[i,:]) < min
           min = norm(x - thetas[i,:])
        end
    end
    return min
end

k_means (generic function with 1 method)

In [9]:
# Regularizer functions
function con_r(theta)
    return mean(theta, 1)
end

function quad_r(theta)
    return sum(theta.^2)
end

function abs_r(theta)
    return sum(abs.(theta))
end

function squ_r(theta)
    return sqrt(sum(abs.(theta)))
end

squ_r (generic function with 1 method)

In [None]:
# Loss functions
function l_quad(y_hat, y)
    loss = (y_hat - y)^2
    return loss
end

function l_abs(y_hat, y)
    loss = abs(y_hat - y)
    return loss
end

function l_hub1(y_hat, y)
    r = y_hat - y
    alpha = 0.5
    loss = 1
    if abs(r) <= alpha
        loss = r^2 
    else
        loss = alpha * (2 * abs(r) - alpha)
    end
    return loss
end

function l_hub2(y_hat, y)
    r = y_hat - y
    alpha = 1
    loss = 1
    if abs(r) <= alpha
        loss = r^2 
    else
        loss = alpha * (2 * abs(r) - alpha)
    end
    return loss
end

function l_hub3(y_hat, y)
    r = y_hat - y
    alpha = 2
    loss = 1
    if abs(r) <= alpha
        loss = r^2 
    else
        loss = alpha * (2 * abs(r) - alpha)
    end
    return loss
end

function l_loghub1(y_hat, y)
    r = y_hat .- y
    loss = 1
    alpha = 0.5
    if abs(r) <= alpha
        loss = r^2 
    else
        loss = alpha * (1 - (2 * log(alpha)) + log(r^2))
    end
    return loss
end

function l_loghub2(y_hat, y)
    r = y_hat .- y
    loss = 1
    alpha = 1
    if abs(r) <= alpha
        loss = r^2 
    else
        loss = alpha * (1 - (2 * log(alpha)) + log(r^2))
    end
    return loss
end

function l_loghub3(y_hat, y)
    r = y_hat .- y
    loss = 1
    alpha = 2
    if abs(r) <= alpha
        loss = r^2 
    else
        loss = alpha * (1 - (2 * log(alpha)) + log(r^2))
    end
    return loss
end