In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import brainiak.reconstruct.iem

In [None]:
# Generate synthetic data with dimension 9 that is linearly separable
# but ill conditioned (because so few observations and duplicates)
n, dim = 300, 9
n_ = int(n/3)
np.random.seed(0)
C_0 = -.25 + .5*np.random.rand(dim, 5) # covariance matrix, initial
C = np.hstack((C_0, C_0[:,0:4]))
centers_0 = np.linspace(-1, 1, dim)
centers_60 = np.roll(centers_0,5)
centers_120 = centers_0[::-1]
X_120 = np.dot(np.random.randn(3, dim), C) + centers_120
X = np.vstack((np.dot(np.random.randn(3, dim), C) + centers_0,
          np.dot(np.random.randn(3, dim), C) + centers_60,
          X_120,
          X_120))

y = np.hstack((np.zeros(3), 60*np.ones(3), 120*np.ones(6)))

print("Data, X, dimensions: " + str([np.size(X,0), np.size(X,1)]))

In [None]:
# Plot some perspectives of the synthetic data: 
# 3-D scatter plot and 2-D perspectives 

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[0,0], X[0,1], X[0,2],'.',color='C0')
ax.scatter(X[1,0], X[1,1], X[1,2],'.',color='C0')
ax.scatter(X[2,0], X[2,1], X[2,2],'.',color='C0')
ax.scatter(X[3,0], X[3,1], X[3,2],'.',color='C1')
ax.scatter(X[4,0], X[4,1], X[4,2],'.',color='C1')
ax.scatter(X[5,0], X[5,1], X[5,2],'.',color='C1')
ax.scatter(X[6,0], X[6,1], X[6,2],'.',color='C2')
ax.scatter(X[7,0], X[7,1], X[7,2],'.',color='C2')
ax.scatter(X[8,0], X[8,1], X[8,2],'.',color='C2')
ax.set_xlabel('dim 1')
ax.set_ylabel('dim 2')
ax.set_zlabel('dim 3')
plt.title('synthetic data: 1st 3 dimensions')
plt.show()

plt.figure()
plt.plot(X[0,0], X[0,8],'.',color='C0')
plt.plot(X[1,0], X[1,8],'.',color='C0')
plt.plot(X[2,0], X[2,8],'.',color='C0')
plt.plot(X[3,0], X[3,8],'.',color='C1')
plt.plot(X[4,0], X[4,8],'.',color='C1')
plt.plot(X[5,0], X[5,8],'.',color='C1')
plt.plot(X[6,0], X[6,8],'.',color='C2')
plt.plot(X[7,0], X[7,8],'.',color='C2')
plt.plot(X[8,0], X[8,8],'.',color='C2')
plt.xlabel('dim 1')
plt.ylabel('dim 9')
plt.title('synthetic data: 2 of 9 dims.')
plt.show()

plt.figure()
plt.plot(y)
plt.xlabel('Trials')
plt.ylabel('Feature')
plt.title('feature values')
plt.show()

Output should be a 3-D view of (3 of) the dimensions of the data. Note that clusters separate (i.e. you could, roughly, draw a line and separate them. A second plot shows just one projects of this, which makes separability more clear. The final plot just shows the feature labels.

In [None]:
# Create IEM object, feed it synthetic data, create new synthetic data
# and predict feature (direction) from that data.

s = brainiak.reconstruct.iem.InvertedEncoding(6, -30, 210)
s.fit(X,y)

X2_0 = np.dot(np.random.randn(n_, dim), C) + centers_0
X2_60 = np.dot(np.random.randn(n_, dim), C) + centers_60
X2_120 = np.dot(np.random.randn(n_, dim), C) + centers_120

y2_0 = np.zeros(n_)
y2_60 = 60*np.ones(n_)
y2_120 = 120*np.ones(n_)

r_hat_0 = s.predict(X2_0)
r_hat_60 = s.predict(X2_60)
r_hat_120 = s.predict(X2_120)

y_hat_0 = s._predict_directions(X2_0)
y_hat_60 = s._predict_directions(X2_60)
y_hat_120 = s._predict_directions(X2_120)

The above cell should have produced an error ("Data matrix is nearly singular."). This is because the sythetic data for this example was designed to be ill-conditioned.

In [None]:
# Show the condition number of the data matrix.
np.linalg.cond(X)

Note: A matrix with a very large condition number is not full rank and is ill conditioned.

In [None]:
n, dim = 300, 9
n_ = int(n / 3)
np.random.seed(0)
C = -.25 + .5 * np.random.rand(dim, dim)  # covariance matrix
centers_0 = np.linspace(-1, 1, dim)
centers_60 = np.roll(centers_0, 5)
centers_120 = centers_0[::-1]
X = np.vstack((np.dot(np.random.randn(n_, dim), C) + centers_0,
               np.dot(np.random.randn(n_, dim), C) + centers_60,
               np.dot(np.random.randn(n_, dim), C) + centers_120))

y = np.hstack((np.zeros(n_), 60 * np.ones(n_), 120 * np.ones(n_)))

# Create iem object
Invt_model = brainiak.reconstruct.iem.InvertedEncoding(6, -30, 210)
Invt_model.fit(X, y)

# offending lines - data ill conditioned.
n_ = 3
C_0 = -.25 + .5*np.random.rand(dim, 5) # covariance matrix, initial
C = np.hstack((C_0, C_0[:,0:4])) # cov. matrix is non-invertible
X_120 = np.dot(np.random.randn(n_, dim), C) + centers_120
X_t = np.vstack((np.dot(np.random.randn(n_, dim), C) + centers_0,
                 np.dot(np.random.randn(n_, dim), C) + centers_60,
                 X_120,
                 X_120))
r_hat = Invt_model.predict(X_t)
m_reconstruct = np.mean(r_hat)

The above cell should have produced an error ('Data matrix is nearly singular.'). This is because the testing data in this example was not full rank, even thought the training data was. This was by design to demonstrate how the algorithm can break. 

Possible solutions to the errors from both this example and the one above is to have more data, or if that is impossible, use fewer dimensions (e.g. a smaller volume of voxels).

In [None]:
# Show the condition number of the data matrix.
np.linalg.cond(X_t)

Note: A matrix with a very large condition number is not full rank and is ill conditioned.