# cca_zoo: Examples
In this notebook I demonstrate the general pipeline I use in the cca_zoo package.

### Imports

In [2]:
import numpy as np
import cca_zoo
import itertools
import os
import matplotlib
matplotlib.use('TKAgg', force=True)
import matplotlib.pyplot as plt

### Load MNIST Data

In [3]:
os.chdir('..')
train_set_1, val_set_1, test_set_1 = cca_zoo.mnist_utils.load_data('noisymnist_view1.gz')
train_set_2, val_set_2, test_set_2 = cca_zoo.mnist_utils.load_data('noisymnist_view2.gz')

train_set_1 = train_set_1[0][:1000]
train_set_2 = train_set_2[0][:1000]
val_set_1 = val_set_1[0][:1000]
val_set_2 = val_set_2[0][:1000]
test_set_1 = test_set_1[0][:1000]
test_set_2 = test_set_2[0][:1000]

print(train_set_1.shape)
print(train_set_2.shape)

loading data ...
loading data ...
(1000, 784)
(1000, 784)


### Settings

In [6]:
#The number of latent dimensions across models
latent_dims = 1
#The number of folds used for cross-validation/hyperparameter tuning
cv_folds = 5
#The number of iterations used for alternating least squares/iterative methods
max_als_iter = 200
#The number of epochs used for deep learning based models
epoch_num = 50

### Linear CCA
We can do this via a few different methods
- alternating least squares
- generalized cca (equivalent to SVD/Eigendecomposition)
- multiset cca (equivalent to SVD/Eigendecomposition)
- scikit learn (NIPALS)

(Note that although the MNIST data here is not full rank,
both alternating least squares and NIPALS find least squares solutions
and therefore this problem is avoided)

In [7]:
linear_cca = cca_zoo.linear.Wrapper(latent_dims=latent_dims)

linear_cca.fit(train_set_1, train_set_2)

linear_cca_results = np.stack((linear_cca.train_correlations[0,1], linear_cca.predict_corr(test_set_1, test_set_2)[0,1]))


scikit_cca = cca_zoo.linear.Wrapper(latent_dims=latent_dims, method='scikit')

scikit_cca.fit(train_set_1, train_set_2)

scikit_cca_results = np.stack((scikit_cca.train_correlations[0,1], scikit_cca.predict_corr(test_set_1, test_set_2)[0,1]))


gcca = cca_zoo.linear.Wrapper(latent_dims=latent_dims, method='gcca')

#small ammount of regularisation added since data is not full rank
params={'c':[1,1]}

gcca.fit(train_set_1, train_set_2, params=params)

gcca_results = np.stack((scikit_cca.train_correlations[0,1], scikit_cca.predict_corr(test_set_1, test_set_2)[0,1]))


### Regularized CCA with hyperparameter tuning
- penalized matrix decomposition ('pmd')
- sparse cca/alternating lasso regression ('scca')
- ridge cca/alternating ridge regression ('l2')
- parkhomenko sparse cca ('parkhomenko')
- elastic ('elastic')

parameter candidates for cross validation are given as a list of lists as shown in the examples

In [5]:
# PMD
c1 = [1, 3, 7, 9]
c2 = [1, 3, 7, 9]
param_candidates = {'c': list(itertools.product(c1, c2))}

pmd = cca_zoo.linear.Wrapper(latent_dims=latent_dims, method='pmd',
                                 max_iter=max_als_iter).cv_fit(train_set_1, train_set_2,
                                                                     param_candidates=param_candidates,
                                                                     folds=cv_folds,verbose=True)

pmd_results = np.stack((pmd.train_correlations[0, 1, :], pmd.predict_corr(test_set_1, test_set_2)[0, 1, :]))

# Elastic
c1 = [0.01, 0.1, 1]
c2 = [0.01, 0.1, 1]
l1_1 = [0.01, 0.01, 0.1]
l1_2 = [0.01, 0.01, 0.1]
param_candidates = {'c': list(itertools.product(c1, c2)), 'ratio': list(itertools.product(l1_1, l1_2))}

elastic = cca_zoo.linear.Wrapper(latent_dims=latent_dims, method='elastic',
                                    max_iter=max_als_iter).cv_fit(train_set_1, train_set_2,
                                                                     param_candidates=param_candidates,
                                                                     folds=cv_folds,verbose=True)

elastic_results = np.stack((elastic.train_correlations[0, 1, :], elastic.predict_corr(test_set_1, test_set_2)[0, 1, :]))

cross validation with  pmd
number of folds:  5
{'c': (1, 1)}
0.6425641715357652
{'c': (1, 3)}
0.6425641715357652
{'c': (1, 7)}
0.6425641715357652
{'c': (1, 9)}
0.6425641715357652
{'c': (3, 1)}
0.6371394001653481
{'c': (3, 3)}
0.6371394001653481
{'c': (3, 7)}
0.6371394001653481
{'c': (3, 9)}
0.6376679112465146
{'c': (7, 1)}
0.6470317773753032
{'c': (7, 3)}
0.6470317773753032
{'c': (7, 7)}
0.6470317773753032
{'c': (7, 9)}
0.6470317773753032
{'c': (9, 1)}
0.6441538131918592
{'c': (9, 3)}
0.6441538131918592
{'c': (9, 7)}
0.6441538131918592
{'c': (9, 9)}
0.6441538131918592
Best score :  0.6470317773753032
{'c': (7, 1)}
cross validation with  elastic
number of folds:  5
{'c': (0.01, 0.01), 'ratio': (0.01, 0.01)}
0.07366394435365718
{'c': (0.01, 0.01), 'ratio': (0.01, 0.01)}
0.07060993526266765
{'c': (0.01, 0.01), 'ratio': (0.01, 0.1)}
0.03716986524138696
{'c': (0.01, 0.01), 'ratio': (0.01, 0.01)}
0.0002004684369551958
{'c': (0.01, 0.01), 'ratio': (0.01, 0.01)}
0.04952230409927184
{'c': (0.01

### Kernel CCA

Similarly, we can use kernel CCA methods:
- regularized kernel CCA ('linear')
- sparse cca/alternating lasso regression ('poly')
- ridge cca/alternating ridge regression ('gaussian')


In [6]:
# r-kernel cca
param_candidates = {'kernel': ['linear'], 'reg': [1e+4, 1e+5, 1e+6]}
kernel_reg = cca_zoo.linear.Wrapper(latent_dims=latent_dims, method='kernel',
                                        max_iter=max_als_iter).cv_fit(train_set_1,train_set_2,
                                                                      folds=cv_folds,
                                                                      param_candidates=param_candidates,
                                                                      verbose=True)
kernel_reg_results = np.stack((
    kernel_reg.train_correlations[0, 1, :],
    kernel_reg.predict_corr(test_set_1, test_set_2)[0, 1, :]))

# kernel cca (poly)
param_candidates = {'kernel': ['poly'], 'degree': [2, 3, 4], 'reg': [1e+6, 1e+7, 1e+8]}

kernel_poly = cca_zoo.linear.Wrapper(latent_dims=latent_dims, method='kernel',
                                         max_iter=max_als_iter).cv_fit(train_set_1, train_set_2,
                                                                     folds=cv_folds,
                                                                     param_candidates=param_candidates,
                                                                     verbose=True)

kernel_poly_results = np.stack((
    kernel_poly.train_correlations[0, 1, :],
    kernel_poly.predict_corr(test_set_1, test_set_2)[0, 1, :]))

# kernel cca (gaussian)
param_candidates = {'kernel': ['gaussian'], 'sigma': [1e+2, 1e+3], 'reg': [1e+6, 1e+7, 1e+8]}

kernel_gaussian = cca_zoo.linear.Wrapper(latent_dims=latent_dims, method='kernel',
                                             max_iter=max_als_iter).cv_fit(train_set_1, train_set_2,
                                                                     folds=cv_folds,
                                                                     param_candidates=param_candidates,
                                                                     verbose=True)

kernel_gaussian_results = np.stack((
        kernel_gaussian.train_correlations[0, 1, :],
        kernel_gaussian.predict_corr(test_set_1, test_set_2)[0, 1, :]))

cross validation with  kernel
number of folds:  5
{'kernel': 'linear', 'reg': 10000.0}
0.6632553642351521
{'kernel': 'linear', 'reg': 100000.0}
0.6832730512309577
{'kernel': 'linear', 'reg': 1000000.0}
0.6503526705119874
Best score :  0.6832730512309577
{'kernel': 'linear', 'reg': 100000.0}
cross validation with  kernel
number of folds:  5
{'kernel': 'poly', 'degree': 2, 'reg': 1000000.0}
0.6055479869308276
{'kernel': 'poly', 'degree': 2, 'reg': 10000000.0}
0.6055470978669287
{'kernel': 'poly', 'degree': 2, 'reg': 100000000.0}
0.6055470089572388
{'kernel': 'poly', 'degree': 3, 'reg': 1000000.0}
0.6068519584932203
{'kernel': 'poly', 'degree': 3, 'reg': 10000000.0}
0.6068499473122219
{'kernel': 'poly', 'degree': 3, 'reg': 100000000.0}
0.606849746177138
{'kernel': 'poly', 'degree': 4, 'reg': 1000000.0}
0.6081621219744734
{'kernel': 'poly', 'degree': 4, 'reg': 10000000.0}
0.608158526582199
{'kernel': 'poly', 'degree': 4, 'reg': 100000000.0}
0.608158166988369
Best score :  0.608162121974473

### Deep Learning

We also have deep CCA methods (and autoencoder variants)
- Deep CCA (DCCA)
- Deep Canonically Correlated Autoencoders (DCCAE)
- Deep Generalized CCA (DGCCA)

Both of the CCA loss and the GCCA loss can be used for DCCA/DCCAE since they are equivalent for two views.

To implement DCCA use DCCAE class with lam=0 (default). This multiplies the reconstruction loss term by 0.

In [7]:
dcca = cca_zoo.deep.Wrapper(latent_dims=latent_dims, epoch_num=epoch_num, method='DCCAE',
                                         loss_type='cca')

dcca.fit(train_set_1, train_set_2)

dcca_results = np.stack((dcca.train_correlations, dcca.predict_corr(test_set_1, test_set_2)))


dgcca = cca_zoo.deep.Wrapper(latent_dims=latent_dims, epoch_num=epoch_num, method='DCCAE',
                                         loss_type='gcca')

dgcca.fit(train_set_1, train_set_2)

dgcca_results = np.stack((dcca.train_correlations, dcca.predict_corr(test_set_1, test_set_2)))

Number of model parameters 406050
====> Epoch: 0 Average train loss: -0.2710
====> Epoch: 0 Average val loss: -0.2761
Min loss -3.59
====> Epoch: 1 Average train loss: -0.8183
====> Epoch: 1 Average val loss: -0.2610
====> Epoch: 2 Average train loss: -0.9335
====> Epoch: 2 Average val loss: -0.2901
Min loss -3.77
====> Epoch: 3 Average train loss: -0.9668
====> Epoch: 3 Average val loss: -0.3015
Min loss -3.92
====> Epoch: 4 Average train loss: -0.9739
====> Epoch: 4 Average val loss: -0.2962
====> Epoch: 5 Average train loss: -0.9692
====> Epoch: 5 Average val loss: -0.3169
Min loss -4.12
====> Epoch: 6 Average train loss: -0.9478
====> Epoch: 6 Average val loss: -0.3434
Min loss -4.46
====> Epoch: 7 Average train loss: -0.9345
====> Epoch: 7 Average val loss: -0.3229
====> Epoch: 8 Average train loss: -0.9213
====> Epoch: 8 Average val loss: -0.3219
====> Epoch: 9 Average train loss: -0.9388
====> Epoch: 9 Average val loss: -0.3294
====> Epoch: 10 Average train loss: -0.9560
====> E

### Deep Variational Learning
Finally we have Deep Variational CCA methods.
- Deep Variational CCA (DVCCA)
- Deep Variational CCA - private (DVVCA_p)

These are both implemented by the DVCCA class with private=True/False and both_encoders=True/False. If both_encoders,
the encoder to the shared information Q(z_shared|x) is modelled for both x_1 and x_2 whereas if both_encoders is false
it is modelled for x_1 as in the paper

In [8]:
dvcca = cca_zoo.deep.Wrapper(latent_dims=latent_dims,epoch_num=epoch_num, method='DVCCA',private=False)

dvcca.fit(train_set_1, train_set_2)

dvcca_results = np.stack((dvcca.train_correlations, dvcca.predict_corr(test_set_1, test_set_2)))


dvcca_p = cca_zoo.deep.Wrapper(latent_dims=latent_dims, epoch_num=epoch_num, method='DVCCA',private=True)

dvcca_p.fit(train_set_1, train_set_2)

dvcca_p_results = np.stack((dvcca_p.train_correlations, dvcca_p.predict_corr(test_set_1, test_set_2)))


Number of model parameters 608808
====> Epoch: 0 Average train loss: 3061.8955
====> Epoch: 0 Average val loss: 2076.2931
====> Epoch: 1 Average train loss: 2102.6470
====> Epoch: 1 Average val loss: 2952.6103
====> Epoch: 2 Average train loss: 2037.6648
====> Epoch: 2 Average val loss: 1909.9441
====> Epoch: 3 Average train loss: 2037.5012
====> Epoch: 3 Average val loss: 1925.7237
====> Epoch: 4 Average train loss: 2026.7704
====> Epoch: 4 Average val loss: 2184.4479
====> Epoch: 5 Average train loss: 2011.8619
====> Epoch: 5 Average val loss: 1892.8650
====> Epoch: 6 Average train loss: 1994.3065
====> Epoch: 6 Average val loss: 1891.6295
====> Epoch: 7 Average train loss: 1985.5509
====> Epoch: 7 Average val loss: 1883.3684
====> Epoch: 8 Average train loss: 1981.5797
====> Epoch: 8 Average val loss: 1880.6190
====> Epoch: 9 Average train loss: 1981.6169
====> Epoch: 9 Average val loss: 1873.9551
Early stopping!
Number of model parameters 609320
====> Epoch: 0 Average train loss: 3

### Make results plot to compare methods

In [9]:
all_results = np.stack(
    [linear_cca_results,pmd_results,elastic_results,kernel_reg_results,kernel_poly_results,
     kernel_gaussian_results,dcca_results,dgcca_results],
    axis=0)
all_labels = ['linear','pmd','elastic','linear kernel','polynomial kernel',
              'gaussian kernel', 'deep CCA', 'deep generalized CCA']

cca_zoo.plot_utils.plot_results(all_results, all_labels)
plt.show()