-
Notifications
You must be signed in to change notification settings - Fork 5
/
unicodeInterpEmbedding.py
189 lines (160 loc) · 7.12 KB
/
unicodeInterpEmbedding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# Author: Kyle Cranmer <kyle.cranmer@nyu.edu>
# Licence: BSD
print(__doc__)
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
from mpl_toolkits.mplot3d import Axes3D
from sklearn import manifold
from sklearn.metrics import euclidean_distances
from sklearn.decomposition import PCA
from sklearn import svm
from scipy.interpolate import griddata
# Next line to silence pyflakes.
Axes3D
#make some random samples in 2d
n_samples = 20
seed = np.random.RandomState(seed=3)
#this one looks cool
#gridMuSigma = np.array([np.linspace(0,1,n_samples),np.linspace(.1,3,n_samples)]).T
#create a set of Gaussians in a grid of mean (-1.5,1.5) and standard devaition (0.2,5)
gridMuSigma=[]
for i in np.linspace(-1.5,1.5,n_samples):
for j in np.linspace(.2,5,n_samples):
gridMuSigma.append([i,j])
gridMuSigma=np.array(gridMuSigma)
#probably an easier way to do with meshgrid, but needs to be reshaped
#gridMuSigma=np.meshgrid(np.linspace(-1.5,1.5,n_samples),np.linspace(0.2,5,n_samples))
# choose a different color for each point
colors = plt.cm.jet(np.linspace(0, 1, len(gridMuSigma)))
#use 2-d Gaussian information metric for distances
# see equation 7 from http://arxiv.org/abs/0802.2050 ("FINE" paper)
def getDistance(x,y):
#going to define a measure here
#print 'in getSim', x, y
aa = x[0]-y[0]
ab = x[1]+y[1]
bb = x[1]-y[1]
num = np.sqrt((aa**2+ab**2))+np.sqrt((aa**2+bb**2))
den = np.sqrt((aa**2+ab**2))-np.sqrt((aa**2+bb**2))
ret = np.log(num/den)
return ret
# Create the array of "similarities" (distances) between points
tempSim=[]
for x in gridMuSigma:
temp = []
for y in gridMuSigma:
temp.append(getDistance(x,y))
tempSim.append(temp)
distances=np.array(tempSim)
#make 3d embedding
mds = manifold.MDS(n_components=3, metric=True, max_iter=3000, eps=1e-9, random_state=seed,
dissimilarity="precomputed", n_jobs=1)
embed3d = mds.fit(distances).embedding_
print len(embed3d), np.shape(embed3d), np.shape(embed3d[:,0])
#make 2d embedding
mds2 = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed,
dissimilarity="precomputed", n_jobs=1)
embed2d = mds2.fit(distances).embedding_
#try to learn mapping
gridMuSigma_pred=[]
n_samples_pred=50
for i in np.linspace(-1.5,1.5,n_samples_pred):
for j in np.linspace(.2,5,n_samples_pred):
gridMuSigma_pred.append([i,j])
gridMuSigma_pred=np.array(gridMuSigma_pred)
#learn 3d embedding
#regr = svm.NuSVR(C=1.0, nu=0.1)
#print regr
#regr.fit(gridMuSigma, np.reshape(embed3d[:,0],[len(gridMuSigma),]))
#embed3d_predx = regr.predict(gridMuSigma_pred)
#regr.fit(gridMuSigma, np.reshape(embed3d[:,1],[len(gridMuSigma),]))
#embed3d_predy = regr.predict(gridMuSigma_pred)
#regr.fit(gridMuSigma, np.reshape(embed3d[:,2],[len(gridMuSigma),]))
#embed3d_predz = regr.predict(gridMuSigma_pred)
embed3d_predx = griddata(gridMuSigma, embed3d[:,0], (gridMuSigma_pred[:,0],gridMuSigma_pred[:,1]), method='cubic')
embed3d_predy = griddata(gridMuSigma, embed3d[:,1], (gridMuSigma_pred[:,0],gridMuSigma_pred[:,1]), method='cubic')
embed3d_predz = griddata(gridMuSigma, embed3d[:,2], (gridMuSigma_pred[:,0],gridMuSigma_pred[:,1]), method='cubic')
#learn 2d embedding
# Train the model using the training sets
#regr.fit(gridMuSigma, np.reshape(embed2d[:,0],[len(gridMuSigma),]))
#embed2d_predx = regr.predict(gridMuSigma_pred)
#regr.fit(gridMuSigma, np.reshape(embed2d[:,1],[len(gridMuSigma),]))
#embed2d_predy = regr.predict(gridMuSigma_pred)
embed2d_predx = griddata(gridMuSigma, embed2d[:,0], (gridMuSigma_pred[:,0],gridMuSigma_pred[:,1]), method='cubic')
embed2d_predy = griddata(gridMuSigma, embed2d[:,1], (gridMuSigma_pred[:,0],gridMuSigma_pred[:,1]), method='cubic')
#learn 2d embedding inverse
# Train the model using the training sets
#regr.fit(embed2d, np.reshape(gridMuSigma[:,0],[len(gridMuSigma),]))
#embed2d_inv_predx = regr.predict(embed2d)
#regr.fit(embed2d, np.reshape(gridMuSigma[:,1],[len(gridMuSigma),]))
#embed2d_inv_predy = regr.predict(embed2d)
embed2d_inv_predx = griddata(embed2d, gridMuSigma[:,0], (embed2d[:,0],embed2d[:,1]), method='cubic')
embed2d_inv_predy = griddata(embed2d, gridMuSigma[:,1], (embed2d[:,0],embed2d[:,1]), method='cubic')
#Setup plots
fig = plt.figure(figsize=(5*4,4.5*2))
#make original grid plot
#gridsubpl = fig.add_subplot(231)
#gridsubpl.scatter(gridMuSigma[:, 0], gridMuSigma[:, 1], s=20, c=colors)
#gridsubpl.set_xlabel('mean')
#gridsubpl.set_ylabel('standard deviation')
#plt.title('Original grid in mean and std. dev.')
#plt.axis('tight')
# plot 3d embedding
#since it is a surface of constant negative curvature (hyperbolic geometry)
#expect it to look like the pseudo-sphere
#http://mathworld.wolfram.com/Pseudosphere.html
subpl = fig.add_subplot(241,projection='3d')
subpl.scatter(embed3d[:, 0], embed3d[:, 1], embed3d[:, 2],s=20, c=colors)
subpl.scatter(embed3d_predx, embed3d_predy, embed3d_predz,s=10, c='r')
subpl.view_init(42, 101) #looks good when njobs=-1
subpl.view_init(-130,-33)#looks good when njobs=1
subpl.set_xlabel('x(μ,σ)')
subpl.set_ylabel('y(μ,σ)')
subpl.set_zlabel('z(μ,σ)')
plt.suptitle('3D Multidim. Scailing Embedding')
plt.axis('tight')
subpl2 = fig.add_subplot(242,projection='3d')
subpl2.scatter(gridMuSigma[:, 0], gridMuSigma[:, 1], embed3d[:, 0], s=20, c=colors)
subpl2.scatter(gridMuSigma_pred[:, 0], gridMuSigma_pred[:, 1], embed3d_predx,s=10, c='r')
subpl2.set_xlabel('mean μ')
subpl2.set_ylabel('std. dev. σ')
subpl2.set_zlabel('x(μ,σ)')
plt.axis('tight')
subpl3 = fig.add_subplot(243,projection='3d')
subpl3.scatter(gridMuSigma[:, 0], gridMuSigma[:, 1], embed3d[:, 1],s=20, c=colors)
subpl3.scatter(gridMuSigma_pred[:, 0], gridMuSigma_pred[:, 1], embed3d_predy,s=10, c='r')
subpl2.set_xlabel('mean μ')
subpl2.set_ylabel('std. dev. σ')
subpl2.set_zlabel('y(μ,σ)')
plt.axis('tight')
subpl4 = fig.add_subplot(244,projection='3d')
subpl4.scatter(gridMuSigma[:, 0], gridMuSigma[:, 1], embed3d[:, 2],s=20, c=colors)
subpl4.scatter(gridMuSigma_pred[:, 0], gridMuSigma_pred[:, 1], embed3d_predz,s=10, c='r')
subpl2.set_xlabel('mean μ')
subpl2.set_ylabel('std. dev. σ')
subpl2.set_zlabel('z(μ,σ)')
plt.axis('tight')
# plot 2d embedding
subpl2 = fig.add_subplot(245)
#subpl2.set_autoscaley_on(False)
subpl2.scatter(embed2d_predx, embed2d_predy,s=10, c='r')
subpl2.scatter(embed2d[:, 0], embed2d[:, 1],s=20, c=colors)
plt.title('2D Multidim. Scailing Embedding')
plt.axis('tight')
subpl2 = fig.add_subplot(246,projection='3d')
subpl2.scatter(gridMuSigma[:, 0], gridMuSigma[:, 1], embed2d[:, 0], s=20, c=colors)
subpl2.scatter(gridMuSigma_pred[:, 0], gridMuSigma_pred[:, 1], embed2d_predx,s=10, c='r')
plt.axis('tight')
subpl3 = fig.add_subplot(247,projection='3d')
subpl3.scatter(gridMuSigma[:, 0], gridMuSigma[:, 1], embed2d[:, 1],s=20, c=colors)
subpl3.scatter(gridMuSigma_pred[:, 0], gridMuSigma_pred[:, 1], embed2d_predy,s=10, c='r')
plt.axis('tight')
#plot 2d inverse embedding
subpl2 = fig.add_subplot(248)
#subpl2.set_autoscaley_on(False)
#subpl2.scatter(gridMuSigma[:, 0], gridMuSigma[:, 1], s=20, c=colors)
subpl2.scatter(embed2d_inv_predx, embed2d_inv_predy,s=10, c='r')
plt.savefig('interpEmbedding.pdf')
plt.savefig('interpEmbedding.png')
plt.show()