# Trabalho de Implementação

## INF2912 - Otimização Combinatória
### Prof. Marcus Vinicius Soledade Poggi de Aragão
### 2015-2

### Ciro Cavani
#### BigData / Globo.com

Algoritmos de clusterização.

## Conteúdo

TBD

## Dataset

In [4]:
include("../src/dataset.jl")
importall Inf2912Clustering
Clustering = Inf2912Clustering
dataset = Clustering.load_dataset("small")
Clustering.summary(dataset)

Number of Groups: 3
Number of Features: 200
Number of Features (group): 40
Probability of Activation: 0.8
Number of Objects (total): 100
Number of Objects per Group (min): 7
Number of Objects per Group (max): 66
Number of Objects in 1: 20
Number of Objects in 2: 38
Number of Objects in 3: 42


### K-Means

Consiste em executar o algoritmo *K-means* determinar os pontos *centrais* de cada grupo e classificar cada objeto como sendo do grupo com ponto central *mais próximo*

In [5]:
n = 40
k = 3
c = 16
c_y = 3

train_data = Clustering.Dataset(size=n, groups=k, features=c, slot=c_y)

inputs = map(v -> float(v[1]), train_data.data)

41-element Array{Array{Float64,1},1}:
 [1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0]
 [1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0]
 [0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0]
 [1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0]
 [1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0]
 [0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0]
 [0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0]
 [1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0]
 [0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0]
 [0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0]
 [1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0]
 [1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0]
 [0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0]
 ⋮                                                                
 [0.0,0.0,0.0,0.0,0.0,0.

#### Mixed Integer Programming with Nonlinear Objective

$$
\begin{align}
\text{minimize} \qquad & \sum_{j=1}^{k}\sum_{i=1}^{n} x_{ij} || s_i - c_j||^2 \\
 \text{subject to} \quad \quad & \sum_{j=1}^{k} x_{ij} = 1 & i = 1, \ldots, n \\
 \qquad \qquad & \sum_{i=1}^{n} x_{ij} \ge 1 & j = 1, \ldots, k \\
 \qquad \qquad & x_{ij} \in \{0, 1\} & i = 1, \ldots, n; j = 1, \ldots, k \\
\end{align}
$$

$$
\begin{align}
c_j = \frac{\sum_{l=1}^{n} x_{lj} s_l}{\sum_{l=1}^{n} x_{lj}}
\end{align}
$$

In [7]:
using JuMP

S = inputs

# function dist2(i,j)
#     C = S[1]
#     cx = x[1,j]
#     for l=2:n
#         C += x[l,j] * S[l]
#         cx += x[l,j]
#     end
#     V = S[i] - C / cx
#     V ̇⋅ V
# end

m = Model()

@defVar(m, x[1:n,1:k], Bin)
@defVar(m, d2[1:n,1:k] ≥ 0)
@defVar(m, cs[1:k,1:c] ≥ 0)
@defVar(m, cx[1:k] ≥ 0)

for j=1:k
    @addConstraint(m, cx[j] == sum{x[l,j], l=1:n})
end

for j=1:k, t=1:c
    @addConstraint(m, cs[j,t] == sum{x[l,j] * S[l][t], l=1:n})
end

for i=1:n, j=1:k
    @addNLConstraint(m, d2[i,j] == sum{(S[i][t] - cs[j,t] / cx[j])^2, t=1:c})
end

@setObjective(m, Min, sum{x[i,j] * d2[i, j], i=1:n, j=1:k})


for i=1:n
    @addConstraint(m, sum{x[i,j], j=1:k} == 1)
end

for j=1:k
    @addConstraint(m, sum{x[i,j], i=1:n} ≥ 1)
end

m;

In [None]:
solve(m)

In [None]:
getValue(x)

#### Approximate Semidefinite programming (SDP) relaxation

$$
\begin{align}
\text{minimize} \qquad & Tr(W(I-Z)) \\
 \text{subject to} \quad \quad & Ze = e \\
 \qquad \qquad & Tr(Z) = k \\
 \qquad \qquad & Z \succeq 0\\
\end{align}
$$

$$
\begin{align}
W_{ij} = \phi(s_i,s_j) = exp^{-\frac{||s_i-s_j||^2}{\sigma}}, \sigma > 0
\end{align}
$$

In [None]:
using JuMP

p = length(inputs)

W = zeros(p,p)
for i=1:p, j=i+1:p
    dist = exp(-norm(inputs[i] - inputs[j])/1.0)
    W[i,j] = dist
    W[j,i] = dist
end

m = Model()

@defVar(m, Z[1:p,1:p], SDP)
@addConstraint(m, Z .≥ 0)

@setObjective(m, Min, trace(W * (eye(p,p) - Z)))

@addConstraint(m, Z * ones(p) .== ones(p))

@addConstraint(m, trace(Z) == k)

solve(m)

Z_val = getValue(Z)[:,:]

println("Raw solution")
println(round(Z_val,4))

# A simple rounding scheme
which_cluster = zeros(Int,p)
num_clusters = 0
for i = 1:p
    Z_val[i,i] <= 1e-6 && continue

    if which_cluster[i] == 0
        num_clusters += 1
        which_cluster[i] = num_clusters
        for j = i+1:p
            println(num_clusters, " ", round(norm(Z_val[i,j] - Z_val[i,i]),2))
            if norm(Z_val[i,j] - Z_val[i,i]) <= 1e-1
                which_cluster[j] = num_clusters
            end
        end
    end
end

println("Clusters: ", which_cluster)
# Print results
for cluster = 1:k
    println("Cluster $cluster")
    for i = 1:p
        if which_cluster[i] == cluster
            println(i, "->", inputs[i])
        end
    end
end

sleep(0.2)