-
Notifications
You must be signed in to change notification settings - Fork 2
/
mds.py
220 lines (160 loc) · 5.63 KB
/
mds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# -*- coding: utf-8 -*-
"""
Utility functions for multidimensional scaling.
R must be installed, and the Python package ``rpy2`` must be installed, e.g.::
$ apt-get install r-base
$ pip install rpy2
See also the examples at:
- http://nbviewer.ipython.org/github/alimanfoo/anhima/blob/master/examples/mds.ipynb
""" # noqa
from __future__ import division, print_function, absolute_import
# third party dependencies
import numpy as np
import matplotlib.pyplot as plt
import sklearn.manifold
_r_initialised = False
rpy2 = None
ro = None
r = None
def _init_r():
"""Private function to initialise R, only executed when needed.
"""
global _r_initialised
global rpy2
global ro
global r
if not _r_initialised:
import rpy2 # noqa
import rpy2.robjects as ro # noqa
from rpy2.robjects import r # noqa
import rpy2.robjects.numpy2ri as numpy2ri
numpy2ri.activate()
_r_initialised = True
def smacof(dist_square, **kwargs):
"""
Multidimensional scaling using the SMACOF (Scaling by Majorizing a
Complicated Function) algorithm.
Parameters
----------
dist_square : array_like, shape (n_samples, n_samples)
A distance matrix in square form.
kwargs : additional keyword arguments
Additional keyword arguments are passed through to
:func:`sklearn.manifold.MDS`.
Returns
-------
coords : ndarray, shape (n_samples, n_components)
An array whose rows give the coordinates of the points chosen to
represent the dissimilarities.
See Also
--------
anhima.dist.pairwise_distance, anhima.mds.classical, sklearn.manifold.MDS,
anhima.pca.pca
"""
# normalise inputs
dist_square = np.asarray(dist_square)
assert dist_square.ndim == 2
assert dist_square.shape[0] == dist_square.shape[1]
# setup model
model = sklearn.manifold.MDS(dissimilarity='precomputed',
**kwargs)
# fit model and get transformed coordinates
coords = model.fit(dist_square).embedding_
return coords
def classical(dist_square, k=2):
"""
Classical multidimensional scaling via the R ``cmdscale`` function.
Parameters
----------
dist_square : array_like, shape (n_samples, n_samples)
A distance matrix in square form.
k : integer, optional
The maximum dimension of the space which the data are to be represented
in; must be in {1, 2, ..., n-1}.
Returns
-------
coords : ndarray, shape (n_samples, k)
An array whose rows give the coordinates of the points chosen to
represent the dissimilarities.
See Also
--------
anhima.dist.pairwise_distance, anhima.mds.smacof, anhima.pca.pca
"""
# setup R
_init_r()
# normalise inputs
dist_square = np.asarray(dist_square)
assert dist_square.ndim == 2
assert dist_square.shape[0] == dist_square.shape[1]
# convert distance matrix to R
m = ro.vectors.Matrix(dist_square)
# apply MDS
coords = r['cmdscale'](m, k=k)
return np.asarray(coords)
def plot_coords(coords, dimx=1, dimy=2, ax=None, colors='b', sizes=20,
labels=None, scatter_kwargs=None, annotate_kwargs=None):
"""Scatter plot of transformed coordinates from multidimensional scaling.
Parameters
----------
coords : ndarray, shape (`n_samples`, `n_components`)
The transformed coordinates.
dimx : int, optional
The dimension to plot on the X axis. N.B., this is
one-based, so `1` is the first dimension, `2` is the second
dimension, etc.
dimy : int, optional
The dimension to plot on the Y axis. N.B., this is
one-based, so `1` is the first dimension, `2` is the second
dimension, etc.
ax : axes, optional
The axes on which to draw. If not provided, a new figure will be
created.
colors : color or sequence of color, optional
Can be a single color format string, or a sequence of color
specifications of length `n_samples`.
sizes : scalar or array_like, shape (`n_samples`), optional
Size in points^2.
labels : sequence of strings
If provided, will be used to label points in the plot.
scatter_kwargs : dict-like
Additional keyword arguments passed through to `plt.scatter`.
annotate_kwargs : dict-like
Additional keyword arguments passed through to `plt.annotate` when
labelling points.
Returns
-------
ax : axes
The axes on which the plot was drawn.
See Also
--------
anhima.mds.smacof, anhima.mds.classical
"""
# normalise inputs
coords = np.asarray(coords)
assert coords.ndim == 2
# set up axes
if ax is None:
# make a square figure
x = plt.rcParams['figure.figsize'][0]
fig, ax = plt.subplots(figsize=(x, x))
# obtain X and Y data, N.B., `pcx` and `pcy` are 1-based
x = coords[:, dimx-1]
y = coords[:, dimy-1]
# plot points
if scatter_kwargs is None:
scatter_kwargs = dict()
ax.scatter(x, y, c=colors, s=sizes, **scatter_kwargs)
# label points
if labels is not None:
if annotate_kwargs is None:
annotate_kwargs = dict()
annotate_kwargs.setdefault('xycoords', 'data')
annotate_kwargs.setdefault('xytext', (3, 3))
annotate_kwargs.setdefault('textcoords', 'offset points')
for l, lx, ly in zip(labels, x, y):
if l is not None:
ax.annotate(str(l), xy=(lx, ly), **annotate_kwargs)
# tidy up
ax.set_xlabel('dimension %s' % dimx)
ax.set_ylabel('dimension %s' % dimy)
return ax