# CCA_methods: Examples
In this notebook I demonstrate the general pipeline I use in the CCA_methods package.

### Imports

In [15]:
import numpy as np
import CCA_methods
import itertools
import os
import matplotlib.pyplot as plt

### Load MNIST Data


In [2]:
os.chdir('..')
train_set_1, val_set_1, test_set_1 = CCA_methods.mnist_utils.load_data('Data/noisymnist_view1.gz')
train_set_2, val_set_2, test_set_2 = CCA_methods.mnist_utils.load_data('Data/noisymnist_view2.gz')

train_set_1 = train_set_1[0][:1000]
train_set_2 = train_set_2[0][:1000]
val_set_1 = val_set_1[0][:1000]
val_set_2 = val_set_2[0][:1000]
test_set_1 = test_set_1[0][:1000]
test_set_2 = test_set_2[0][:1000]

print(train_set_1.shape)
print(train_set_2.shape)

loading data ...
loading data ...
(1000, 784)
(1000, 784)


### Settings

In [3]:
#The number of latent dimensions across models
outdim_size = 1
#The number of folds used for cross-validation/hyperparameter tuning
cv_folds = 5
#The number of iterations used for alternating least squares/iterative methods
max_als_iter = 5
#The number of epochs used for deep learning based models
epoch_num = 50

### Linear CCA
We can do this via a few different methods
- alternating least squares
- generalized cca (equivalent to SVD/Eigendecomposition)
- multiset cca (equivalent to SVD/Eigendecomposition)
- scikit learn (NIPALS)

(Note that although the MNIST data here is not full rank,
both alternating least squares and NIPALS find least squares solutions
and therefore this problem is avoided)

In [13]:
linear_cca = CCA_methods.linear.Wrapper(outdim_size=outdim_size)

linear_cca.fit(train_set_1, train_set_2)

linear_cca_results = np.stack((linear_cca.train_correlations[0,1], linear_cca.predict_corr(test_set_1, test_set_2)[0,1]))


scikit_cca = CCA_methods.linear.Wrapper(outdim_size=outdim_size, method='scikit')

scikit_cca.fit(train_set_1, train_set_2)

scikit_cca_results = np.stack((scikit_cca.train_correlations[0,1], scikit_cca.predict_corr(test_set_1, test_set_2)[0,1]))


gcca = CCA_methods.linear.Wrapper(outdim_size=outdim_size, method='gcca')

#small ammount of regularisation added since data is not full rank
params={'c':[1,1]}

gcca.fit(train_set_1, train_set_2, params=params)

gcca_results = np.stack((scikit_cca.train_correlations[0,1], scikit_cca.predict_corr(test_set_1, test_set_2)[0,1]))


### Regularized CCA with hyperparameter tuning
- penalized matrix decomposition ('pmd')
- sparse cca/alternating lasso regression ('scca')
- ridge cca/alternating ridge regression ('l2')
- parkhomenko sparse cca ('parkhomenko')
- elastic ('elastic')

parameter candidates for cross validation are given as a list of lists as shown in the examples

In [7]:
# PMD
c1 = [1, 3, 7, 9]
c2 = [1, 3, 7, 9]
param_candidates = {'c': list(itertools.product(c1, c2))}

pmd = CCA_methods.linear.Wrapper(outdim_size=outdim_size, method='pmd',
                                 max_iter=max_als_iter).cv_fit(train_set_1, train_set_2,
                                                                     param_candidates=param_candidates,
                                                                     folds=cv_folds,verbose=True)

pmd_results = np.stack((pmd.train_correlations[0, 1, :], pmd.predict_corr(test_set_1, test_set_2)[0, 1, :]))

# Elastic
c1 = [0.01, 0.1, 1]
c2 = [0.01, 0.1, 1]
l1_1 = [0.01, 0.01, 0.1]
l1_2 = [0.01, 0.01, 0.1]
param_candidates = {'c': list(itertools.product(c1, c2)), 'ratio': list(itertools.product(l1_1, l1_2))}

elastic = CCA_methods.linear.Wrapper(outdim_size=outdim_size, method='elastic',
                                    max_iter=max_als_iter).cv_fit(train_set_1, train_set_2,
                                                                     param_candidates=param_candidates,
                                                                     folds=cv_folds,verbose=True)

elastic_results = np.stack((elastic.train_correlations[0, 1, :], elastic.predict_corr(test_set_1, test_set_2)[0, 1, :]))

cross validation with  pmd
number of folds:  5
{'c': (1, 1)}
0.6386345136967805
{'c': (1, 3)}
0.6386345136967805
{'c': (1, 7)}
0.6386345136967805
{'c': (1, 9)}
0.6386345136967805
{'c': (3, 1)}
0.6353841463328237
{'c': (3, 3)}
0.6353841463328237
{'c': (3, 7)}
0.6353841463328237
{'c': (3, 9)}
0.6352769269680133
{'c': (7, 1)}
0.6332411830464156
{'c': (7, 3)}
0.6332411830464156
{'c': (7, 7)}
0.6332411830464156
{'c': (7, 9)}
0.6332411830464156
{'c': (9, 1)}
0.6336151578639766
{'c': (9, 3)}
0.6336151578639766
{'c': (9, 7)}
0.6336151578639766
{'c': (9, 9)}
0.6336151578639766
Best score :  0.6386345136967805
{'c': (1, 1)}
cross validation with  elastic
number of folds:  5
{'c': (0.01, 0.01), 'ratio': (0.01, 0.01)}
0.06999213104306767
{'c': (0.01, 0.01), 'ratio': (0.01, 0.01)}
0.13485930890695902
{'c': (0.01, 0.01), 'ratio': (0.01, 0.1)}
0.009790397843851285
{'c': (0.01, 0.01), 'ratio': (0.01, 0.01)}
0.05448896601751946
{'c': (0.01, 0.01), 'ratio': (0.01, 0.01)}
0.04925159357960475
{'c': (0.01,

### Kernel CCA

Similarly, we can use kernel CCA methods:
- regularized kernel CCA ('linear')
- sparse cca/alternating lasso regression ('poly')
- ridge cca/alternating ridge regression ('gaussian')


In [9]:
# r-kernel cca
param_candidates = {'kernel': ['linear'], 'reg': [1e+4, 1e+5, 1e+6]}
kernel_reg = CCA_methods.linear.Wrapper(outdim_size=outdim_size, method='kernel',
                                        max_iter=max_als_iter).cv_fit(train_set_1,train_set_2,
                                                                      folds=cv_folds,
                                                                      param_candidates=param_candidates,
                                                                      verbose=True)
kernel_reg_results = np.stack((
    kernel_reg.train_correlations[0, 1, :],
    kernel_reg.predict_corr(test_set_1, test_set_2)[0, 1, :]))

# kernel cca (poly)
param_candidates = {'kernel': ['poly'], 'degree': [2, 3, 4], 'reg': [1e+6, 1e+7, 1e+8]}

kernel_poly = CCA_methods.linear.Wrapper(outdim_size=outdim_size, method='kernel',
                                         max_iter=max_als_iter).cv_fit(train_set_1, train_set_2,
                                                                     folds=cv_folds,
                                                                     param_candidates=param_candidates,
                                                                     verbose=True)

kernel_poly_results = np.stack((
    kernel_poly.train_correlations[0, 1, :],
    kernel_poly.predict_corr(test_set_1, test_set_2)[0, 1, :]))

# kernel cca (gaussian)
param_candidates = {'kernel': ['gaussian'], 'sigma': [1e+2, 1e+3], 'reg': [1e+6, 1e+7, 1e+8]}

kernel_gaussian = CCA_methods.linear.Wrapper(outdim_size=outdim_size, method='kernel',
                                             max_iter=max_als_iter).cv_fit(train_set_1, train_set_2,
                                                                     folds=cv_folds,
                                                                     param_candidates=param_candidates,
                                                                     verbose=True)

kernel_gaussian_results = np.stack((
        kernel_gaussian.train_correlations[0, 1, :],
        kernel_gaussian.predict_corr(test_set_1, test_set_2)[0, 1, :]))

cross validation with  kernel
number of folds:  5
{'kernel': 'linear', 'reg': 10000.0}
0.6532040204570014
{'kernel': 'linear', 'reg': 100000.0}
0.6765012094098057
{'kernel': 'linear', 'reg': 1000000.0}
0.6471253020144976
Best score :  0.6765012094098057
{'kernel': 'linear', 'reg': 100000.0}
cross validation with  kernel
number of folds:  5
{'kernel': 'poly', 'degree': 2, 'reg': 1000000.0}
0.6048838628740791
{'kernel': 'poly', 'degree': 2, 'reg': 10000000.0}
0.6048831125616787
{'kernel': 'poly', 'degree': 2, 'reg': 100000000.0}
0.6048830375279454
{'kernel': 'poly', 'degree': 3, 'reg': 1000000.0}
0.6063111158678961
{'kernel': 'poly', 'degree': 3, 'reg': 10000000.0}
0.6063094198182477
{'kernel': 'poly', 'degree': 3, 'reg': 100000000.0}
0.6063092502004535
{'kernel': 'poly', 'degree': 4, 'reg': 1000000.0}
0.6077528598342603
{'kernel': 'poly', 'degree': 4, 'reg': 10000000.0}
0.6077498302560856
{'kernel': 'poly', 'degree': 4, 'reg': 100000000.0}
0.6077495272570215
Best score :  0.607752859834

### Deep Learning

We also have deep CCA methods (and autoencoder variants)
- Deep CCA (DCCA)
- Deep Canonically Correlated Autoencoders (DCCAE)
- Deep Generalized CCA (DGCCA)

Both of the CCA loss and the GCCA loss can be used for DCCA/DCCAE since they are equivalent for two views.

To implement DCCA use DCCAE class with lam=0 (default). This multiplies the reconstruction loss term by 0.

In [4]:
dcca = CCA_methods.deep.Wrapper(outdim_size=outdim_size, epoch_num=epoch_num, method='DCCAE',
                                         loss_type='cca')

dcca.fit(train_set_1, train_set_2)

dcca_results = np.stack((dcca.train_correlations, dcca.predict_corr(test_set_1, test_set_2)))


dgcca = CCA_methods.deep.Wrapper(outdim_size=outdim_size, epoch_num=epoch_num, method='DCCAE',
                                         loss_type='gcca')

dgcca.fit(train_set_1, train_set_2)

dgcca_results = np.stack((dcca.train_correlations, dcca.predict_corr(test_set_1, test_set_2)))

Number of model parameters 406050
====> Epoch: 0 Average train loss: -0.2428
====> Epoch: 0 Average val loss: -0.2366
Min loss -3.08
====> Epoch: 1 Average train loss: -0.7939
====> Epoch: 1 Average val loss: -0.2378
Min loss -3.09
====> Epoch: 2 Average train loss: -0.9289
====> Epoch: 2 Average val loss: -0.2001
====> Epoch: 3 Average train loss: -0.9653
====> Epoch: 3 Average val loss: -0.2340
====> Epoch: 4 Average train loss: -0.9685
====> Epoch: 4 Average val loss: -0.2705
Min loss -3.52
====> Epoch: 5 Average train loss: -0.9570
====> Epoch: 5 Average val loss: -0.2553
====> Epoch: 6 Average train loss: -0.9340
====> Epoch: 6 Average val loss: -0.2722
Min loss -3.54
====> Epoch: 7 Average train loss: -0.9089
====> Epoch: 7 Average val loss: -0.1893
====> Epoch: 8 Average train loss: -0.9088
====> Epoch: 8 Average val loss: -0.3018
Min loss -3.92
====> Epoch: 9 Average train loss: -0.9369
====> Epoch: 9 Average val loss: -0.2553
====> Epoch: 10 Average train loss: -0.9645
====> E

### Deep Variational Learning
Finally we have Deep Variational CCA methods.
- Deep Variational CCA (DVCCA)
- Deep Variational CCA - private (DVVCA_p)

These are both implemented by the DVCCA class with private=True/False and both_encoders=True/False. If both_encoders,
the encoder to the shared information Q(z_shared|x) is modelled for both x_1 and x_2 whereas if both_encoders is false
it is modelled for x_1 as in the paper

In [18]:
dvcca = CCA_methods.deep.Wrapper(outdim_size=outdim_size, epoch_num=epoch_num, method='DVCCA',private=False)

dvcca.fit(train_set_1, train_set_2)

dvcca_results = np.stack((dvcca.train_correlations, dvcca.predict_corr(test_set_1, test_set_2)))


dvcca_p = CCA_methods.deep.Wrapper(outdim_size=outdim_size, epoch_num=epoch_num, method='DCCAE',private=True)

dvcca_p.fit(train_set_1, train_set_2)

dvcca_p_results = np.stack((dvcca_p.train_correlations, dvcca_p.predict_corr(test_set_1, test_set_2)))


TypeError: can only concatenate list (not "NoneType") to list

### Make results plot to compare methods

In [16]:
all_labels = ['ALS', 'L2 - ALS', 'Witten', 'Parkhomenko', 'Waaijenborg - Elastic ALS',
              'scikit', 'DCCA']


all_results = np.stack(
    [linear_cca_results,pmd_results,elastic_results,kernel_reg_results,kernel_poly_results,
     kernel_gaussian_results,dcca_results,dgcca_results],
    axis=0)
all_labels = ['linear','pmd','elastic','linear kernel','polynomial kernel',
              'gaussian kernel', 'deep CCA', 'deep generalized CCA']

CCA_methods.plot_utils.plot_results(all_results, all_labels)
plt.show()

  del sys.path[0]
