In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist
from sklearn.metrics.pairwise import pairwise_distances

def gen_data(no_data, center_point, sd=0.1, dim=2):
    cov = np.diag([sd]*dim)
    return np.random.multivariate_normal(center_point, cov, no_data)

centers = [(-0.5,-0.2),(0,0.6),(0.5,0)]
training_data = []
for i in range(3):
    new_data = gen_data(30, centers[i] )
    training_data.append(new_data)
training_data = np.concatenate(training_data)



In [2]:
training_data.shape

(90, 2)

In [3]:
plt.scatter(training_data[:,0], training_data[:,1])

<matplotlib.collections.PathCollection at 0x7f5dfab2c518>

In [4]:
sd = 0.1
distance = pairwise_distances(training_data)
def computeRBF(data):
    return np.exp(-data/(2*sd**2))

training_rbf = computeRBF(distance) # now data span on 90 dimensions

print(training_rbf.shape)



(90, 90)


In [5]:
def centerize( df ):
    mean = np.mean( df, axis = 0 )
    centered = df - mean 
    return (centered, mean)

class PCA:
    def __init__(self, X):
        # X's shape (items,dimensions)
        (centered, self.means) = centerize(X)
        cov = np.cov( centered.T )

        (eig_values, eig_vectors) = np.linalg.eig(cov)

        eig_idx = np.argsort(-eig_values)
        (self.eig_values, self.eig_vectors) = (eig_values[eig_idx], eig_vectors[:,eig_idx])
        
    def project(self, X):
        # X's shape (items,dimensions)
        X = X - self.means
        return np.dot(X, self.eig_vectors)
    
    def reconstruct(self, X, eig_vectors_idx):
        res = np.dot( X, self.eig_vectors[:, eig_vectors_idx].T )
        res = res.T + self.means.reshape(len(self.means),1)
        return res.T

In [6]:
myPCA = PCA(training_rbf)
myPCA.eig_values[:5]



array([ 0.01722086,  0.01634792,  0.0147582 ,  0.01346044,  0.01334   ])

In [7]:
myPCA.eig_vectors[0].shape

(90,)

In [8]:


grid_space = 1.5
delta = 0.005

x = np.arange(-grid_space, grid_space + delta, delta)
y = np.arange(-grid_space, grid_space + delta, delta)
X, Y = np.meshgrid(x, y)

grid_points = len(X)
print(X.shape)
print(np.max(Y))

test_data = np.hstack([X.reshape((grid_points**2,1)),Y.reshape((grid_points**2,1))])
print(test_data.shape)

(601, 601)
1.5
(361201, 2)


In [11]:
print(X)
print(Y)

[[-1.5   -1.495 -1.49  ...,  1.49   1.495  1.5  ]
 [-1.5   -1.495 -1.49  ...,  1.49   1.495  1.5  ]
 [-1.5   -1.495 -1.49  ...,  1.49   1.495  1.5  ]
 ..., 
 [-1.5   -1.495 -1.49  ...,  1.49   1.495  1.5  ]
 [-1.5   -1.495 -1.49  ...,  1.49   1.495  1.5  ]
 [-1.5   -1.495 -1.49  ...,  1.49   1.495  1.5  ]]


In [9]:
test_data_rbf = list(map(lambda x: spanOnRBFDims(x, training_data), test_data ))
test_data_rbf = np.array(test_data_rbf).reshape( (len(test_data),90) )

NameError: name 'spanOnRBFDims' is not defined