In [32]:
import torch
import torch.nn as nn
from torchvision import datasets
import numpy as np
from linear_cca import linear_cca
from torch.utils.data import BatchSampler, SequentialSampler, Dataset
from DeepCCAModels import DeepCCA
from main import Solver
from utils import load_data, svm_classify
import matplotlib.pyplot as plt
try:
    import cPickle as thepickle
except ImportError:
    import _pickle as thepickle

import gzip
import numpy as np
torch.set_default_tensor_type(torch.DoubleTensor)

from multiview.embed.gcca import GCCA


In [4]:
%matplotlib inline
plt.style.use("ggplot")
%config InlineBackend.figure_format = 'svg'
np.set_printoptions(suppress=True) # don't use scientific [e.g. 5e10] notation

In [14]:
# Parameters Section
device = torch.device('cpu')
print("Using", torch.cuda.device_count(), "GPUs")

# the path to save the final learned features
save_to = './new_features.gz'

# the size of the new space learned by the model (number of the new features)
outdim_size = 50

# size of the input for view 1 and view 2
input_shape1 = 392
input_shape2 = 392

# number of layers with nodes in each one
# layer_sizes1 = [1024, 1024, 1024, outdim_size]
# layer_sizes2 = [1024, 1024, 1024, outdim_size]
layer_sizes1 = [2038, outdim_size]
layer_sizes2 = [1608, outdim_size]

# the parameters for training the network
learning_rate = 1e-3
epoch_num = 10
batch_size = 800

# the regularization parameter of the network
# seems necessary to avoid the gradient exploding especially when non-saturating activations are used
reg_par = 1e-5

# specifies if all the singular values should get used to calculate the correlation or just the top outdim_size ones
# if one option does not work for a network or dataset, try the other one
use_all_singular_values = False

# if a linear CCA should get applied on the learned features extracted from the networks
# it does not affect the performance on noisy MNIST significantly
apply_linear_cca = True
# end of parameters section
############

### Don't need this right now since not using noisy mnist
# Each view is stored in a gzip file separately. They will get downloaded the first time the code gets executed.
# Datasets get stored under the datasets folder of user's Keras folder
# normally under [Home Folder]/.keras/datasets/
# data1 = load_data('./noisymnist_view1.gz')
# data2 = load_data('./noisymnist_view2.gz')

Using 0 GPUs


In [15]:
def mnist_left_right():
    mnist_train = Dataset.mnistDataset = datasets.MNIST("./mnist", train=True, download=True)
    mnist_test = datasets.MNIST("./mnist/", train=False, download=True)

    data1 = []
    data2 = []

    #### get train data
    temp_array1 = np.zeros((50000, 14*28))
    temp_array2 = np.zeros((50000, 14*28))
    labels = []
    for i in range(len(mnist_train)-10000): # 
        img, label = mnist_train[i]
        labels.append(label)
        img = np.array(img) / 255
        image1, image2 = img[:,:14], img[:,14:]
        temp_array1[i,:] = np.reshape(image1, (1,14*28))
        temp_array2[i,:] = np.reshape(image2, (1,14*28))

    labels = np.array(labels)
    temp_array1 = torch.tensor(temp_array1)
    temp_array2 = torch.tensor(temp_array2)
    data1.append((temp_array1, labels))
    data2.append((temp_array2, labels))
    #### get eval data
    temp_array1 = np.zeros((10000, 14*28))
    temp_array2 = np.zeros((10000, 14*28))
    labels = []
    for i in range(10000): # 
        img, label = mnist_train[i+50000]
        labels.append(label)
        img = np.array(img) / 255
        image1, image2 = img[:,:14], img[:,14:]
        temp_array1[i,:] = np.reshape(image1, (1,14*28))
        temp_array2[i,:] = np.reshape(image2, (1,14*28))
    labels = np.array(labels)
    temp_array1 = torch.tensor(temp_array1)
    temp_array2 = torch.tensor(temp_array2)
    data1.append((temp_array1, labels))
    data2.append((temp_array2, labels))
    #### get test data
    temp_array1 = np.zeros((10000, 14*28))
    temp_array2 = np.zeros((10000, 14*28))
    labels = []
    for i in range(10000): # 
        img, label = mnist_test[i]
        labels.append(label)
        img = np.array(img) / 255
        image1, image2 = img[:,:14], img[:,14:]
        temp_array1[i,:] = np.reshape(image1, (1,14*28))
        temp_array2[i,:] = np.reshape(image2, (1,14*28))
    labels = np.array(labels)
    temp_array1 = torch.tensor(temp_array1)
    temp_array2 = torch.tensor(temp_array2)
    data1.append((temp_array1, labels))
    data2.append((temp_array2, labels))
    return data1, data2

In [22]:
class SplitMNIST(Dataset):
  """
  Returns 2 views of the MNIST dataset. View 1 is the left half of each image,
  and View 2 is the right half.
  """
  def __init__(self, train=True):
    super().__init__()
    self.mnistDataset = datasets.MNIST("./mnist", train=train, download=True)

  def __len__(self):
    return len(self.mnistDataset)

  def __getitem__(self, idx):
    randomIndex = lambda: np.random.randint(len(self.mnistDataset))
    image1, label1 = self.mnistDataset[idx]

    image1 = np.array(image1) / 255
    image1, image2 = image1[:,:14], image1[:,14:]


    image1 = torch.FloatTensor(image1).unsqueeze(0)
    image2 = torch.FloatTensor(image2).unsqueeze(0)

    return (image1, image2, label1)


In [23]:
data1, data2 = mnist_left_right()

In [24]:
# Building, training, and producing the new features by DCCA
model = DeepCCA(layer_sizes1, layer_sizes2, input_shape1,
                input_shape2, outdim_size, use_all_singular_values, device=device).double()
l_cca = None
if apply_linear_cca:
    l_cca = linear_cca()
solver = Solver(model, l_cca, outdim_size, epoch_num, batch_size,
                learning_rate, reg_par, device=device)
train1, train2 = data1[0][0], data2[0][0]
val1, val2 = data1[1][0], data2[1][0]
test1, test2 = data1[2][0], data2[2][0]
# val1=None
# test1=None
solver.fit(train1, train2, val1, val2, test1, test2)
# TODO: Save linear_cca model if needed

set_size = [0, train1.size(0), train1.size(
    0) + val1.size(0), train1.size(0) + val1.size(0) + test1.size(0)]
loss, outputs = solver.test(torch.cat([train1, val1, test1], dim=0), torch.cat(
    [train2, val2, test2], dim=0), apply_linear_cca)

[ INFO : 2020-02-11 10:36:39,382 ] - DataParallel(
  (module): DeepCCA(
    (model1): MlpNet(
      (layers): ModuleList(
        (0): Sequential(
          (0): Linear(in_features=392, out_features=2038, bias=True)
          (1): Sigmoid()
        )
        (1): Linear(in_features=2038, out_features=50, bias=True)
      )
    )
    (model2): MlpNet(
      (layers): ModuleList(
        (0): Sequential(
          (0): Linear(in_features=392, out_features=1608, bias=True)
          (1): Sigmoid()
        )
        (1): Linear(in_features=1608, out_features=50, bias=True)
      )
    )
  )
)
[ INFO : 2020-02-11 10:36:39,383 ] - RMSprop (
Parameter Group 0
    alpha: 0.99
    centered: False
    eps: 1e-08
    lr: 0.001
    momentum: 0
    weight_decay: 1e-05
)
[ INFO : 2020-02-11 10:36:45,579 ] - Epoch 1: val_loss improved from 0.0000 to -25.7335, saving model to checkpoint.model
[ INFO : 2020-02-11 10:36:45,586 ] - Epoch 1/10 - time: 6.20 - training_loss: -23.0980 - val_loss: -25.7335
[ 

Linear CCA started!


In [25]:
### Compute correlations between 50 features
new_data = []
# print(outputs)
for idx in range(3):
    new_data.append([outputs[0][set_size[idx]:set_size[idx + 1], :],
                     outputs[1][set_size[idx]:set_size[idx + 1], :], data1[idx][1]])

X1_train, X2_train, train_label = new_data[0]
X1_val, X2_val, valid_label = new_data[1]
X1_test, X2_test, test_label = new_data[2]

sum_corr_train = 0
for idx, (row1, row2) in enumerate(zip(X1_train, X2_train)):
    sum_corr_train += np.correlate(row1, row2)
    
corr_train = sum_corr_train/idx
    
sum_corr_test = 0
for idx, (row1, row2) in enumerate(zip(X1_test, X2_test)):
    sum_corr_test += np.correlate(row1, row2)
    
corr_test = sum_corr_test/idx
    

### Compute correlations of original images
X1_train, _ = data1[0]
X2_train, _ = data2[0]
X1_val, _ = data1[1]
X2_val, _ = data2[1]
X1_test, _ = data1[2]
X2_test, _ = data2[2]

X1_train = X1_train.numpy()
X2_train = X2_train.numpy()
X1_val = X1_val.numpy()
X2_val = X2_val.numpy()
X1_test = X1_test.numpy()
X2_test = X2_test.numpy()

sum_corr_train_orig = 0
for idx, (row1, row2) in enumerate(zip(X1_train, X2_train)):
    sum_corr_train_orig += np.correlate(row1, row2)
corr_train_orig = sum_corr_train_orig/idx    

sum_corr_test_orig = 0
for idx, (row1, row2) in enumerate(zip(X1_test, X2_test)):
    sum_corr_test_orig += np.correlate(row1, row2)
corr_test_orig = sum_corr_test_orig/idx

print("Cross-View Correlations before and after transformation")
print("Train: {} -> {}".format(corr_train_orig, corr_train))
print("Test: {} -> {}".format(corr_test_orig, corr_test))

Cross-View Correlations before and after transformation
Train: [3.18240939] -> [42.15251606]
Test: [3.17578552] -> [41.28774998]


In [27]:
# remove the examples that are all 0's so that GCCA can work
    
bad_rows = []
for idx, (row1, row2) in enumerate(zip(X1_train, X2_train)):
    if (not row1.any()) or (not row2.any()):  # one of the rows has only 0's
        bad_rows.append(idx)
        
X1_train_filtered = np.delete(X1_train, bad_rows, axis=0)
X2_train_filtered = np.delete(X2_train, bad_rows, axis=0)
print(X2_train_filtered.shape)

bad_rows = []
for idx, (row1, row2) in enumerate(zip(X1_test, X2_test)):
    if (not row1.any()) or (not row2.any()):  # one of the rows has only 0's
        bad_rows.append(idx)
        
X1_test_filtered = np.delete(X1_test, bad_rows, axis=0)
X2_test_filtered = np.delete(X2_test, bad_rows, axis=0)
print(X2_test_filtered.shape)

(49998, 392)
(10000, 392)


In [35]:
# Perform GCCA
gcca = GCCA(n_components=50)
Xs_train = [X1_train_filtered, X2_train_filtered]
Xs_test = [X1_test_filtered, X2_test_filtered]
latent_train1, latent_train2 = gcca.fit_transform(Xs_train)
latent_test1, latent_test2 = gcca.transform(Xs_test)

In [36]:
# Compute GCCA correlations
sum_corr_train = 0
for idx, (row1, row2) in enumerate(zip(latent_train1, latent_train2)):
    sum_corr_train += np.correlate(row1, row2)
    corr_train = sum_corr_train/idx    

sum_corr_test = 0
for idx, (row1, row2) in enumerate(zip(latent_test1, latent_test2)):
    sum_corr_test += np.correlate(row1, row2)
    corr_test = sum_corr_test/idx


print("Cross-View Correlations From GCCA")
print("Train: {}".format(corr_train))
print("Test: {}".format(corr_test))

  """


Cross-View Correlations From GCCA
Train: [16.03364892]
Test: [15.61656102]


  # Remove the CWD from sys.path while we load stuff.
