In [1]:
from visualize import distance, visualize_img
%matplotlib inline

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.metrics import euclidean_distances

## Testing whether MDS implementation is correct: 

* generate some random samples
* calculate Euclidean distances between them
* reconstruct the positions

In [2]:
seed = np.random.RandomState(seed=3)
n_samples = 10
X_true = seed.randint(0, 20, 2 * n_samples).astype(np.float)
X_true = X_true.reshape((n_samples, 2))
D = euclidean_distances(X_true)

X = distance.MDS(D)

print(X)

# plot = visualize_img.scatter_plot(X, 'testing')
plot = visualize_img.scatter_img(X, distance.readImgList(), 'out.png')

[[ 1.53740067  2.82240294]
 [ 4.5582354   4.79081964]
 [-9.49349382 -0.0617803 ]
 [-1.74734732 -2.29721264]
 [ 0.35451664  0.06544713]
 [ 7.17877795 -9.39123197]
 [-0.04278552 -8.46927555]
 [-4.50116532  6.50148964]
 [ 9.67787505  1.50620112]
 [-7.52201373  4.53313998]]


  .format(dtypeobj_in, dtypeobj_out))
  warn('%s is a low contrast image' % fname)
  .format(dtypeobj_in, dtypeobj_out))
  "value {} fits in {}".format(a.dtype, dtype, a.max(), dtype))


## Testing on CURET-VisiProg data
* Create pairwise matrix
* Remove rows with all zeros entry
* Convert to similarity matrix:
  * Identical:         0
  * Highest labeling:  1
  * etc
  * Not in same group: 100 (a large value)
  
  
An example of the transformation is shown below:

In [3]:
S = distance.count_matrix([[0,1],[1,2]], 4)

print("Original pairwise matrix")
print(S)

print("Method A")
D, nonZeroIndex = distance.similarity_to_distance(S, missing_value=100)
print(D)

print("Method B")
D, nonZeroIndex = distance.similarity_to_distance(S, missing_value=0)
print(D)

Original pairwise matrix
[[ 1.  1.  0.  0.]
 [ 1.  2.  1.  0.]
 [ 0.  1.  1.  0.]
 [ 0.  0.  0.  0.]]
Method A
[[   0.    1.  100.  100.]
 [   1.    0.    1.  100.]
 [ 100.    1.    0.  100.]
 [ 100.  100.  100.    0.]]
Method B
[[ 0.  1.  0.  0.]
 [ 1.  0.  1.  0.]
 [ 0.  1.  0.  0.]
 [ 0.  0.  0.  0.]]


In [4]:
# groups, N = readData()
# S = count_matrix(groups, N)
# D, nonZeroIndex = similarity_to_distance(S, missing_value=100)

# plot = get_MDS_plot(D)

In [9]:
groups, N = distance.readVSPLabel()
S = distance.count_matrix(groups, N)
D, nonZeroIndex = distance.similarity_to_distance(S, missing_value=0)

plot = distance.SMACOF(D)

it: 0, stress 231731.736133
it: 0, stress 247919.746203
it: 0, stress 218123.515245
it: 0, stress 245413.215301
it: 0, stress 228414.212479
it: 0, stress 232307.375507
it: 1, stress 753.027912877
it: 0, stress 224317.957784
it: 1, stress 705.091034465
it: 1, stress 760.331716386
it: 0, stress 232433.958535
it: 1, stress 712.650441113
it: 1, stress 707.360231304
it: 2, stress 752.643712523
it: 1, stress 742.712053235
it: 1, stress 705.213945494
it: 2, stress 760.011717475
it: 2, stress 704.715818138
it: 1, stress 704.774248853


[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:   30.6s remaining:  1.5min


it: 2, stress 712.243542242
it: 2, stress 706.992736546
it: 2, stress 742.346885292
it: 2, stress 704.840755814
it: 2, stress 704.398020208


[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:   33.2s finished


In [13]:
print(plot.shape)
plot = plot[:-1]
listImg = distance.readImgList()
print(len(listImg))

visualize_img.scatter_img(plot, distance.readImgList(), 'smacof.png')

(5245, 2)
5245


  .format(dtypeobj_in, dtypeobj_out))
  warn('%s is a low contrast image' % fname)
  .format(dtypeobj_in, dtypeobj_out))
  "value {} fits in {}".format(a.dtype, dtype, a.max(), dtype))


## Why MDS do not work?

MDS is highly sensitive to all entries of the S matrix. It requires that all of the entries make sense, and follow the metric axioms.

In VSP case, lots of entries of the matrix has the same value (100). This forces MDS to learn the circle representation.

In other words, the simple model of setting pair not belongs to a group to be a large number does not work.