# Exercise 13.3: Dual Perceptron

In [None]:
using Random
using Plots
using RDatasets
using COSMO
using JuMP
import LinearAlgebra
LA = LinearAlgebra

In [None]:
# Load the dataset with the iris flowers
iris = dataset("datasets", "iris")
first(iris, 5)

In [None]:
# find the indices of the different species
setosa_indices = findall(x -> x .== "setosa", iris.Species)
virginica_indices = findall(x -> x .== "virginica", iris.Species);
versicolor_indices = findall(x -> x .== "versicolor", iris.Species);

In [None]:
# combine the species indices
species_indices = vcat(virginica_indices, setosa_indices)

# create the data matrix containing data from of the features PetalWidth and PetalLength
data = Matrix(iris[species_indices,["PetalWidth", "PetalLength"]])

# create the vector containg the labels of each data point
labels = iris[species_indices,["Species"]]

# converting the string label into numeric label -1 or 1
binary_labels = [x.Species == "setosa" ? -1 : 1 for x in eachrow(labels)];

In [None]:
# l: labels in -1 and +1 format
# x: input data
# max_iters: maximum number of iterations within which the algorithm should converge
function dual_perceptron(x, l, max_iters)
    α = zeros(Int64, size(x)[1])
    b = 0.0 
    # ================================================================================================
    #  Implement the dual Perceptron given in the lecture.
    
    for iteration in 1:max_iters
        # calculate the weights
        w = sum([α[j] * l[j] * x[j,:]  for j in 1:size(x)[1]])
        
        #get indices of wrongly classified data points
        indices =  filter( i -> l[i] * (w'x[i,:] + b) <= 0.0 , 1:size(x)[1])
        
        # break the loop if all data points are properly classified
        isempty(indices) ? break :
    
        # update count and offset for a randomly selected datapoint
        index = indices[rand((1 : length(indices)))]
        α[index] =  α[index] + 1
        b = b + l[index]    
    end
    # ================================================================================================
    return b, α  
end;

In [None]:
b , α = dual_perceptron( data, binary_labels, 10000)
println("Total number of updates: ", sum(α))

# evaluate weights vector of decision boundary
w = sum([α[j] * binary_labels[j] * data[j, :] for j in 1:size(data)[1]]);

In [None]:
# plot the datasets
Plots.scatter(iris.PetalWidth[setosa_indices], iris.PetalLength[setosa_indices], label="setosa", size = (350, 350))
Plots.scatter!(iris.PetalWidth[virginica_indices], iris.PetalLength[virginica_indices], label="virginica")

# plot the decision boundary
# run the dual_perceptron several times and observe the different decision boundaries!
x_axis = collect(0:3)
slope = - w[1] / w[2]
y_intercept = (-b / w[2]) 
a =  slope * x_axis .+ y_intercept
plot!(x_axis, a, label="decision boundary",  xlabel = "petal width/cm", ylabel= "petal length/cm", color= :red, legend=:bottomright)
ylims!((0,8))

# Exercise 13.4: Dual Support Vector Machine

In [None]:
# define kernel functions

function exponential_kernel(x, y; σ::Float64=1.0)
    result = exp(-LA.norm(x-y) / (2*σ^2))
    return result
end


# σ: gaussian standard deviation
function gaussian_kernel(x, y; σ::Float64=1.0)
    # ================================================================================================
    #  Implement the Gaussian kernel
    result = exp(-LA.norm(x.-y)^2 / (2*σ^2))
    # ================================================================================================
    return result
end

function polynomial_kernel(x, y; d=2, α=1., c=1.)
    # ================================================================================================
    #  Implement the polynomial kernel of degree d
    result = ((α.*(y'*x) .+ c).^ d)
    # ================================================================================================
    return result
end

In [None]:
# function solving the dual of the hard-margin SVM
# l: labels in -1 and +1 format
# x: input data
# k: Kernel function
function svm(x, l; k=dot)
    #build Matrix K from kernels and labels
    K = zeros(length(l), length(l))
    for i = 1:length(l), j=1:length(l)
        K[i,j] = l[i]*l[j]*k(x[i,:],x[j,:])
    end
    
    # ensure positive semi-definiteness of K that might be violated by Eigenvalues close to zero 
    # due to numerical imprecisions
    for i = 1:length(l)
        K[i,i] += 0.001
    end
    
    # ================================================================================================
    # define the objective and constraints of the optimization problem
    # solve the optimization problem
    model = Model(COSMO.Optimizer);
    set_optimizer_attributes(model, "max_iter" => 1000000)
    @variable(model, μ[1:length(l)]);
    @constraint(model, 0 .<= μ);
    @constraint(model, μ'*l == 0);
    @objective(model, Max, sum(μ) - 0.5 * μ'*K*μ)

    optimize!(model)
    μval = value.(μ)
    # ================================================================================================
    
    # return the values of the dual variable
    return μval
end;

In [None]:
# Use output of svm() function to classify data points
# l: labels in -1 and +1 format
# x: training data
# y: test data
# μ: dual variable
# k: Kernel function
function svm_classify(y, x, l, μ; k=dot)
    A = findall(μ .> 0.0001)
    # ================================================================================================
    #  Implement the computation of  <w | y> + b 
    
    b = 1/length(A) * sum([l[a] .- sum(l[i]*μ[i]*k(x[i,:],x[a,:]) for i=1:length(l)) for a in A])
    result = b

    for i=1:length(μ)
        result = result .+ μ[i] .* l[i] .* k(x[i,:],y)
    end
    # ================================================================================================
    return result
end

## Classify Versicolor & Virginica

In [None]:
# combine indices of virginica and versicolor species
species_indices = vcat(virginica_indices, versicolor_indices)

# create the data matrix containing data from virginica and versicolor species
# with measured features PetalWidth and PetalLength
data = Matrix(iris[species_indices,["PetalWidth", "PetalLength"]])

# create the vector containg the labels of each data point
labels = iris[species_indices,["Species"]]

# converting the string label into numeric label -1 or 1
binary_labels = [x.Species == "versicolor" ? -1 : 1 for x in eachrow(labels)];

In [None]:
function gauss_kernel_closure(x,y)
    return gaussian_kernel(x, y; σ = 0.7)
end

function poly_kernel_closure(x, y)
    return polynomial_kernel(x,y,d=5)
end

function exp_kernel_closure(x, y)
    return exponential_kernel(x,y,σ = 1.0)
end

# choose a kernel function
#kernel_function = poly_kernel_closure
kernel_function = gauss_kernel_closure
#kernel_function = exp_kernel_closure

# compute the result of the svm
μ = svm(data, binary_labels, k=kernel_function);

In [None]:
println("Number of support vectors: ", sum(μ .> 0.01))

In [None]:
# closure for svm_classify to simplify plotting
function svm_classify_closure(y1,y2)
    return svm_classify([y1,y2], data, binary_labels, μ,  k=kernel_function)
end

In [None]:
n_points = 50

ys = LA.range(2,8,length = n_points)
xs = LA.range(0,3,length = n_points)

svm_result = [svm_classify_closure(xi,yi) for xi = xs, yi = ys]

max_level = max(abs(minimum(svm_result)),abs(maximum(svm_result)))
levels = LA.range(-max_level,max_level, length = 20)
Plots.contour(xs, ys, svm_classify_closure, fill=true, c=:bluesreds, levels=levels)#, colorbar=:none)# 
scatter!(iris.PetalWidth[versicolor_indices], iris.PetalLength[versicolor_indices], label="versicolor")
scatter!(iris.PetalWidth[virginica_indices], iris.PetalLength[virginica_indices], label="virginica")

ylims!((2,8))
xlims!((0,3))
plot!(size=(500,300), xlabel = "petal width/cm", ylabel= "petal length/cm",)

## SVM self-made

In [None]:
include("optimization_library.jl")
import ForwardDiff

In [None]:
# function solving the dual of the hard-margin SVM
# l: labels in -1 and +1 format
# x: input data
# k: Kernel function
function svm_v2(x, l; k=dot)
    #build Matrix K from kernels and labels
    K = zeros(length(l), length(l))
    for i = 1:length(l), j=1:length(l)
        K[i,j] = l[i]*l[j]*k(x[i,:],x[j,:])
    end
    
    # ensure positive semi-definiteness of K that might be violated by Eigenvalues close to zero 
    # due to numerical imprecisions
    for i = 1:length(l)
        K[i,i] += 0.001
    end
    
    # construct objective function   
    f(μ) = -sum(μ) + 0.5 * μ'*K*μ
    df(μ) = ForwardDiff.gradient(f,μ)
    Hf(μ) = ForwardDiff.hessian(f,μ)
    
    # construct equality constraints
    # since A has to be a matrix in ConstraintElimination_ipm and not a vector we simply duplicate the 
    # constraints. This is a bit of a dirty hack, sorry... Still, the result is the same!

    A = [2 .*l';l']
    b = [0,0]
    
    # construct inequality constraints
    n_constr = length(μ)
    g_vec = Vector(undef, n_constr)
    ∇g_vec = Vector(undef, n_constr)
    Hg_vec = Vector(undef, n_constr)

    for i in 1:n_constr
        g_vec[i] = μ -> -μ[i]
        ∇g_vec[i] = μ -> ForwardDiff.gradient(g_vec[i], μ)
        Hg_vec[i] = μ -> ForwardDiff.hessian(g_vec[i], μ)
    end
       
    μval, trace = ConstraintElimination_ipm(f,df,Hf,A,b;g=g_vec,dg=∇g_vec,Hg=Hg_vec,eps = 0.0001, barrier_increase = 2.0,
                                    maxiters = 100, inner_maxiters = 100)
    # return the values of the dual variable
    return μval
end;

In [None]:
# choose a kernel function
kernel_function = exp_kernel_closure

# compute the result of the svm
μ2 = svm_v2(data, binary_labels, k=kernel_function);

In [None]:
# closure for svm_classify to simplify plotting
function svm_classify_closure(y1,y2)
    return svm_classify([y1,y2], data, binary_labels, μ2,  k=kernel_function)
end

In [None]:
ys = 2.0:0.04:8.0
xs = 0.0:0.04:3.0
Plots.contour(xs, ys, svm_classify_closure, fill=true, c=:bluesreds)#, colorbar=:none)# levels=[-1,0.5,0,0.5,1])
scatter!(iris.PetalWidth[versicolor_indices], iris.PetalLength[versicolor_indices], label="versicolor")
scatter!(iris.PetalWidth[virginica_indices], iris.PetalLength[virginica_indices], label="virginica")

ylims!((2,8))
xlims!((0,3))
plot!(size=(500,300), xlabel = "petal width/cm", ylabel= "petal length/cm",)

In [None]:
[μ2 μ]

In [None]:
# Deviation between svm and svm_v2
sum((μ2-μ) .> 0.1)