<a href="https://colab.research.google.com/github/jameschapman19/cca_zoo/blob/master/cca_zoo_tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install cca-zoo --upgrade
!pip install scipy --upgrade

Collecting cca-zoo
[?25l  Downloading https://files.pythonhosted.org/packages/ea/a1/371f6838d30b72f38bba242ae3d7d9d488c2659a63b6be9bb7ad3a052b7d/cca_zoo-1.1.22-py3-none-any.whl (43kB)
[K     |███████▌                        | 10kB 12.1MB/s eta 0:00:01[K     |███████████████                 | 20kB 13.5MB/s eta 0:00:01[K     |██████████████████████▍         | 30kB 9.2MB/s eta 0:00:01[K     |█████████████████████████████▉  | 40kB 7.6MB/s eta 0:00:01[K     |████████████████████████████████| 51kB 2.6MB/s 
Collecting scipy>=1.5
[?25l  Downloading https://files.pythonhosted.org/packages/b6/3a/9e0649ab2d5ade703baa70ef980aa08739226e5d6a642f084bb201a92fc2/scipy-1.6.1-cp37-cp37m-manylinux1_x86_64.whl (27.4MB)
[K     |████████████████████████████████| 27.4MB 156kB/s 
Collecting mvlearn
[?25l  Downloading https://files.pythonhosted.org/packages/f8/a1/77ee37d526442c7680734d9a30e671f6dcaca7b38ce49e50f76796301e07/mvlearn-0.4.1-py3-none-any.whl (2.1MB)
[K     |███████████████████████████

In [2]:
# Imports
import numpy as np
from cca_zoo import wrappers
from cca_zoo import data
import itertools
import os
import matplotlib.pyplot as plt
from torch.utils.data import Subset
from torch import optim

# Load MNIST Data
os.chdir('..')
N = 500
dataset = data.Noisy_MNIST_Dataset(mnist_type='FashionMNIST', train=True)
ids = np.arange(min(2 * N, len(dataset)))
np.random.shuffle(ids)
train_ids, val_ids = np.array_split(ids, 2)
val_dataset = Subset(dataset, val_ids)
train_dataset = Subset(dataset, train_ids)
test_dataset = data.Noisy_MNIST_Dataset(mnist_type='FashionMNIST', train=False)
test_ids = np.arange(min(N, len(test_dataset)))
np.random.shuffle(test_ids)
test_dataset = Subset(test_dataset, test_ids)
train_view_1, train_view_2, train_rotations, train_OH_labels, train_labels = train_dataset.dataset.to_numpy(
    train_dataset.indices)
val_view_1, val_view_2, val_rotations, val_OH_labels, val_labels = val_dataset.dataset.to_numpy(val_dataset.indices)
test_view_1, test_view_2, test_rotations, test_OH_labels, test_labels = test_dataset.dataset.to_numpy(
    test_dataset.indices)

# Settings

# The number of latent dimensions across models
latent_dims = 2
# The number of folds used for cross-validation/hyperparameter tuning
cv_folds = 5
# For running hyperparameter tuning in parallel (0 if not)
jobs = 2
# Number of iterations for iterative algorithms
max_iter = 2
# number of epochs for deep models
epochs = 50

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ../../data/FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../../data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ../../data/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ../../data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../../data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ../../data/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ../../data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../../data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ../../data/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ../../data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ../../data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ../../data/FashionMNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


# Canonical Correlation Analysis

In [4]:
"""
### Linear CCA by eigendecomposition
"""
linear_cca = wrappers.CCA(latent_dims=latent_dims)

linear_cca.fit(train_view_1, train_view_2)

linear_cca_results = np.stack(
    (linear_cca.train_correlations[0, 1], linear_cca.predict_corr(test_view_1, test_view_2)[0, 1]))

"""
### Linear CCA by alternating least squares (can pass more than 2 views)
"""

linear_cca_als = wrappers.CCA_ALS(latent_dims=latent_dims)

linear_cca_als.fit(train_view_1, train_view_2)

linear_cca_als_results = np.stack(
    (linear_cca_als.train_correlations[0, 1], linear_cca_als.predict_corr(test_view_1, test_view_2)[0, 1]))

# Partial Least Squares


In [5]:
"""
### PLS with scikit-learn (only permits 2 views)
"""
pls = wrappers.PLS(latent_dims=latent_dims)

pls.fit(train_view_1, train_view_2)

pls_results = np.stack(
    (pls.train_correlations[0, 1], pls.predict_corr(test_view_1, test_view_2)[0, 1]))

# Extension to multiple views



In [7]:
"""
### (Regularized) Generalized CCA(can pass more than 2 views)
"""
# small ammount of regularisation added since data is not full rank
c=[0.5,0.5,0.5]

gcca = wrappers.GCCA(latent_dims=latent_dims,c=c)

gcca.fit(train_view_1, train_view_2,train_view_1)

gcca_results = np.stack((gcca.train_correlations[0, 1], gcca.predict_corr(test_view_1, test_view_2)[0, 1]))

"""
### (Regularized) Multiset CCA(can pass more than 2 views)
"""

mcca = wrappers.MCCA(latent_dims=latent_dims, c=c)

mcca.fit(train_view_1, train_view_2,train_view_1)

mcca_results = np.stack((mcca.train_correlations[0, 1], mcca.predict_corr(test_view_1, test_view_2)[0, 1]))

"""
### Multiset CCA by alternating least squares
"""
mcca_als = wrappers.CCA_ALS(latent_dims=latent_dims, max_iter=max_iter)

mcca_als.fit(train_view_1, train_view_2,train_view_1)

mcca_als_results = np.stack(
    (mcca_als.train_correlations[0, 1], mcca_als.predict_corr(test_view_1, test_view_2)[0, 1]))

"""
### Multiset PLS by alternating least squares
"""
mcca_pls = wrappers.PLS(latent_dims=latent_dims, max_iter=max_iter)

mcca_pls.fit(train_view_1, train_view_2,train_view_1)

mcca_pls_results = np.stack(
    (mcca_als.train_correlations[0, 1], mcca_pls.predict_corr(test_view_1, test_view_2)[0, 1]))

# Weighted GCCA/Missing Observation GCCA

In [16]:
#observation_matrix
K = np.ones((3, N))
K[0, 200:] = 0
K[1, :100] = 0

#view weights
view_weights=[1,2,1.2]

c=[0.5,0.5,0.5]

gcca = wrappers.GCCA(latent_dims=latent_dims,c=c,K=K,view_weights=view_weights)

gcca.fit(train_view_1, train_view_2,train_view_1)

gcca_results = np.stack((gcca.train_correlations[0, 1], gcca.predict_corr(test_view_1, test_view_2)[0, 1]))

# Rgularised CCA solutions based on alternating minimisation/alternating least squares

We implement Witten's penalized matrix decomposition form of sparse CCA using 'pmd'

We implement Waaijenborg's penalized CCA using elastic net using 'elastic'

We implement Mai's sparse CCA using 'scca'

Furthermore, any of these methods can be extended to multiple views. Witten describes this method explicitly.

In [19]:
"""
### Ridge CCA (can pass more than 2 views)
"""
c1 = [0.1, 0.3, 0.7, 0.9]
c2 = [0.1, 0.3, 0.7, 0.9]
param_candidates = {'c': list(itertools.product(c1, c2))}

ridge = wrappers.rCCA(latent_dims=latent_dims).gridsearch_fit(
    train_view_1,
    train_view_2,
    param_candidates=param_candidates,
    folds=cv_folds,
    verbose=True, jobs=jobs,
    plot=True)

ridge_results = np.stack((ridge.train_correlations[0, 1, :], ridge.predict_corr(test_view_1, test_view_2)[0, 1, :]))

"""
### Sparse CCA (Penalized Matrix Decomposition) (can pass more than 2 views)
"""

# PMD
c1 = [1, 3, 7, 9]
c2 = [1, 3, 7, 9]
param_candidates = {'c': list(itertools.product(c1, c2))}

pmd = wrappers.PMD(latent_dims=latent_dims, max_iter=max_iter).gridsearch_fit(
    train_view_1,
    train_view_2,
    param_candidates=param_candidates,
    folds=cv_folds,
    verbose=True, jobs=jobs,
    plot=True)

pmd_results = np.stack((pmd.train_correlations[0, 1, :], pmd.predict_corr(test_view_1, test_view_2)[0, 1, :]))

"""
### Sparse CCA (can pass more than 2 views)
"""

# Sparse CCA
c1 = [0.00001, 0.0001]
c2 = [0.00001, 0.0001]
param_candidates = {'c': list(itertools.product(c1, c2))}

scca = wrappers.SCCA(latent_dims=latent_dims, max_iter=max_iter).gridsearch_fit(
    train_view_1,
    train_view_2,
    param_candidates=param_candidates,
    folds=cv_folds,
    verbose=True,
    jobs=jobs, plot=True)

scca_results = np.stack(
    (scca.train_correlations[0, 1, :], scca.predict_corr(test_view_1, test_view_2)[0, 1, :]))


"""
### Elastic CCA (can pass more than 2 views)
"""

# Elastic CCA
c1 = [0.001, 0.0001]
c2 = [0.001, 0.0001]
l1_1 = [0.01, 0.1]
l1_2 = [0.01, 0.1]
param_candidates = {'c': list(itertools.product(c1, c2)), 'l1_ratio': list(itertools.product(l1_1, l1_2))}

elastic = wrappers.ElasticCCA(latent_dims=latent_dims,
                              max_iter=max_iter).gridsearch_fit(train_view_1,
                                                                train_view_2,
                                                                param_candidates=param_candidates,
                                                                folds=cv_folds,
                                                                verbose=True,
                                                                jobs=jobs,
                                                                plot=True)

elastic_results = np.stack(
    (elastic.train_correlations[0, 1, :], elastic.predict_corr(test_view_1, test_view_2)[0, 1, :]))

cross validation
number of folds:  5
Best score :  1.3529892529635505
Standard deviation :  0.059119304363148245
{'c': (0.9, 0.9)}
cross validation
number of folds:  5
Best score :  1.2857203069492038
Standard deviation :  0.07696203372638133
{'c': (9, 9)}
cross validation
number of folds:  5
Best score :  1.0367573399679475
Standard deviation :  0.05797312103256841
{'c': (0.0001, 1e-05)}
cross validation
number of folds:  5
Best score :  1.1020018271387282
Standard deviation :  0.11042487305609101
{'c': (0.001, 0.001), 'l1_ratio': (0.1, 0.01)}


# Kernel CCA

In [9]:
"""
### Kernel CCA

Similarly, we can use kernel CCA methods with [method='kernel']

We can use different kernels and their associated parameters in a similar manner to before
- regularized linear kernel CCA: parameters :  'kernel'='linear', 0<'c'<1
- polynomial kernel CCA: parameters : 'kernel'='poly', 'degree', 0<'c'<1
- gaussian rbf kernel CCA: parameters : 'kernel'='gaussian', 'sigma', 0<'c'<1
"""
# %%
# r-kernel cca
c1 = [0.9, 0.99]
c2 = [0.9, 0.99]

param_candidates = {'kernel': ['linear'], 'c': list(itertools.product(c1, c2))}

kernel_reg = wrappers.KCCA(latent_dims=latent_dims).gridsearch_fit(train_view_1, train_view_2,
                                                                   folds=cv_folds,
                                                                   param_candidates=param_candidates,
                                                                   verbose=True, jobs=jobs,
                                                                   plot=True)
kernel_reg_results = np.stack((
    kernel_reg.train_correlations[0, 1, :],
    kernel_reg.predict_corr(test_view_1, test_view_2)[0, 1, :]))

# kernel cca (poly)
param_candidates = {'kernel': ['poly'], 'degree': [2, 3], 'c': list(itertools.product(c1, c2))}

kernel_poly = wrappers.KCCA(latent_dims=latent_dims).gridsearch_fit(train_view_1, train_view_2,
                                                                    folds=cv_folds,
                                                                    param_candidates=param_candidates,
                                                                    verbose=True, jobs=jobs,
                                                                    plot=True)

kernel_poly_results = np.stack((
    kernel_poly.train_correlations[0, 1, :],
    kernel_poly.predict_corr(test_view_1, test_view_2)[0, 1, :]))

# kernel cca (gaussian)
param_candidates = {'kernel': ['rbf'], 'sigma': [1e+1, 1e+2, 1e+3], 'c': list(itertools.product(c1, c2))}

kernel_gaussian = wrappers.KCCA(latent_dims=latent_dims).gridsearch_fit(train_view_1, train_view_2,
                                                                        folds=cv_folds,
                                                                        param_candidates=param_candidates,
                                                                        verbose=True, jobs=jobs,
                                                                        plot=True)

kernel_gaussian_results = np.stack((
    kernel_gaussian.train_correlations[0, 1, :],
    kernel_gaussian.predict_corr(test_view_1, test_view_2)[0, 1, :]))

cross validation
number of folds:  5
Best score :  1.541090705230807
Standard deviation :  0.03833964743929596
{'kernel': 'linear', 'c': (0.99, 0.99)}
cross validation
number of folds:  5
Best score :  1.0910477287449436
Standard deviation :  0.03801607770360919
{'kernel': 'poly', 'degree': 3, 'c': (0.9, 0.9)}
cross validation
number of folds:  5
Best score :  1.1812386613423436
Standard deviation :  0.04826994218032073
{'kernel': 'rbf', 'sigma': 100.0, 'c': (0.9, 0.9)}


# Deep CCA

DCCA can be optimized using Andrew's original tracenorm objective or Wang's DCCA by nonlinear orthogonal iterations using the argument als=True.

In [10]:
"""
### Deep Learning

We also have deep CCA methods (and autoencoder variants)
- Deep CCA (DCCA)
- Deep Canonically Correlated Autoencoders (DCCAE)

We introduce a Config class from configuration.py. This contains a number of default settings for running DCCA.

"""
from cca_zoo import deepwrapper, objectives, dcca, deep_models

# %%
# DCCA
print('DCCA')
encoder_1 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784)
encoder_2 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784)
dcca_model = dcca.DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2])

dcca_model = deepwrapper.DeepWrapper(dcca_model)

dcca_model.fit(train_dataset, val_dataset, epochs=epochs)

dcca_results = np.stack((dcca_model.train_correlations[0, 1], dcca_model.predict_corr(test_view_1, test_view_2)[0, 1]))


# DCCA_NOI
# Note that als=True
print('DCCA by non-linear orthogonal iterations')
encoder_1 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784)
encoder_2 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784)
dcca_noi_model = dcca.DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2], als=True)

dcca_noi_model = deepwrapper.DeepWrapper(dcca_noi_model)

dcca_noi_model.fit(train_dataset, val_dataset, epochs=epochs)

dcca_noi_results = np.stack(
    (dcca_noi_model.train_correlations[0, 1], dcca_noi_model.predict_corr(test_view_1, test_view_2)[0, 1]))

DCCA
total parameters:  201476
====> Epoch: 1 Average train loss: -0.1992
====> Epoch: 1 Average val loss: -0.4696
Min loss -0.47
====> Epoch: 2 Average train loss: -0.4284
====> Epoch: 2 Average val loss: -1.0864
Min loss -1.09
====> Epoch: 3 Average train loss: -0.9979
====> Epoch: 3 Average val loss: -1.1795
Min loss -1.18
====> Epoch: 4 Average train loss: -1.2693
====> Epoch: 4 Average val loss: -1.2537
Min loss -1.25
====> Epoch: 5 Average train loss: -1.2971
====> Epoch: 5 Average val loss: -1.3359
Min loss -1.34
====> Epoch: 6 Average train loss: -1.3230
====> Epoch: 6 Average val loss: -1.3458
Min loss -1.35
====> Epoch: 7 Average train loss: -1.3473
====> Epoch: 7 Average val loss: -1.3718
Min loss -1.37
====> Epoch: 8 Average train loss: -1.3747
====> Epoch: 8 Average val loss: -1.4354
Min loss -1.44
====> Epoch: 9 Average train loss: -1.3934
====> Epoch: 9 Average val loss: -1.4116
====> Epoch: 10 Average train loss: -1.3941
====> Epoch: 10 Average val loss: -1.4448
Min los

# DCCA with custom optimizers and schedulers

In [11]:
# DCCA
optimizers = [optim.Adam(encoder_1.parameters(), lr=1e-4), optim.Adam(encoder_2.parameters(), lr=1e-4)]
schedulers = [optim.lr_scheduler.CosineAnnealingLR(optimizers[0], 1),
              optim.lr_scheduler.ReduceLROnPlateau(optimizers[1])]
dcca_model = dcca.DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2],
                                objective=objectives.CCA, optimizers=optimizers, schedulers=schedulers)
# hidden_layer_sizes are shown explicitly but these are also the defaults
dcca_model = deepwrapper.DeepWrapper(dcca_model)
dcca_model.fit(train_dataset, val_dataset,epochs=20)

total parameters:  201476
====> Epoch: 1 Average train loss: -0.1003
====> Epoch: 1 Average val loss: -0.1052
Min loss -0.11
====> Epoch: 2 Average train loss: -0.1133
====> Epoch: 2 Average val loss: -0.1389
Min loss -0.14
====> Epoch: 3 Average train loss: -0.1222
====> Epoch: 3 Average val loss: -0.1342
====> Epoch: 4 Average train loss: -0.1494
====> Epoch: 4 Average val loss: -0.1637
Min loss -0.16
====> Epoch: 5 Average train loss: -0.1330
====> Epoch: 5 Average val loss: -0.1813
Min loss -0.18
====> Epoch: 6 Average train loss: -0.2063
====> Epoch: 6 Average val loss: -0.2123
Min loss -0.21
====> Epoch: 7 Average train loss: -0.1922
====> Epoch: 7 Average val loss: -0.2314
Min loss -0.23
====> Epoch: 8 Average train loss: -0.2298
====> Epoch: 8 Average val loss: -0.2817
Min loss -0.28
====> Epoch: 9 Average train loss: -0.2450
====> Epoch: 9 Average val loss: -0.3244
Min loss -0.32
====> Epoch: 10 Average train loss: -0.2932
====> Epoch: 10 Average val loss: -0.2934
====> Epoch:



DeepWrapper(device='cpu',
            model=DCCA(
  (encoders): ModuleList(
    (0): Encoder(
      (layers): Sequential(
        (0): Sequential(
          (0): Linear(in_features=784, out_features=128, bias=True)
          (1): ReLU()
        )
      )
      (fc): Linear(in_features=128, out_features=2, bias=True)
    )
    (1): Encoder(
      (layers): Sequential(
        (0): Sequential(
          (0): Linear(in_features=784, out_features=128, bias=True)
          (1): ReLU()
        )
      )
      (fc): Linear(in_features=128, out_features=2, bias=True)
    )
  )
),
            tensorboard=False, tensorboard_tag=None)

# DGCCA and DMCCA for more than 2 views

The only change we need to make is to the objective argument to perform DGCCA and DMCCA.

In [12]:
# DGCCA
print('DGCCA')
encoder_1 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784)
encoder_2 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784)
dgcca_model = dcca.DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2], objective=objectives.GCCA)

dgcca_model = deepwrapper.DeepWrapper(dgcca_model)

dgcca_model.fit(train_dataset, val_dataset, epochs=epochs)

dgcca_results = np.stack(
    (dgcca_model.train_correlations[0, 1], dgcca_model.predict_corr(test_view_1, test_view_2)[0, 1]))

# DMCCA
print('DMCCA')
encoder_1 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784)
encoder_2 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784)
dmcca_model = dcca.DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2], objective=objectives.MCCA)

dmcca_model = deepwrapper.DeepWrapper(dmcca_model)

dmcca_model.fit(train_dataset, val_dataset, epochs=epochs)

dmcca_results = np.stack(
    (dmcca_model.train_correlations[0, 1], dmcca_model.predict_corr(test_view_1, test_view_2)[0, 1]))

DGCCA
total parameters:  201476
====> Epoch: 1 Average train loss: -0.1636
====> Epoch: 1 Average val loss: -0.6327
Min loss -0.63
====> Epoch: 2 Average train loss: -0.7176
====> Epoch: 2 Average val loss: -1.1977
Min loss -1.20
====> Epoch: 3 Average train loss: -1.2958
====> Epoch: 3 Average val loss: -1.3110
Min loss -1.31
====> Epoch: 4 Average train loss: -1.2916
====> Epoch: 4 Average val loss: -1.3415
Min loss -1.34
====> Epoch: 5 Average train loss: -1.3092
====> Epoch: 5 Average val loss: -1.3802
Min loss -1.38
====> Epoch: 6 Average train loss: -1.3512
====> Epoch: 6 Average val loss: -1.3728
====> Epoch: 7 Average train loss: -1.4042
====> Epoch: 7 Average val loss: -1.4120
Min loss -1.41
====> Epoch: 8 Average train loss: -1.4188
====> Epoch: 8 Average val loss: -1.4291
Min loss -1.43
====> Epoch: 9 Average train loss: -1.3925
====> Epoch: 9 Average val loss: -1.4063
====> Epoch: 10 Average train loss: -1.4053
====> Epoch: 10 Average val loss: -1.4349
Min loss -1.43
====> 

# Deep Canonically Correlated Autoencoders
We need to add decoders in order to model deep canonically correlated autoencoders and we also use the DCCAE class which inherits from DCCA

In [13]:
from cca_zoo import dccae

# DCCAE
print('DCCAE')
encoder_1 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784)
encoder_2 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784)
decoder_1 = deep_models.Decoder(latent_dims=latent_dims, feature_size=784)
decoder_2 = deep_models.Decoder(latent_dims=latent_dims, feature_size=784)
dccae_model = dccae.DCCAE(latent_dims=latent_dims, encoders=[encoder_1, encoder_2], decoders=[decoder_1, decoder_2])

dccae_model = deepwrapper.DeepWrapper(dccae_model)

dccae_model.fit(train_view_1, train_view_2, epochs=epochs)

dccae_results = np.stack(
    (dccae_model.train_correlations[0, 1], dccae_model.predict_corr(test_view_1, test_view_2)[0, 1]))

DCCAE
total parameters:  404516
====> Epoch: 1 Average train loss: 32907.5391
====> Epoch: 1 Average val loss: 27802.5078
Min loss 27802.51
====> Epoch: 2 Average train loss: 27708.0488
====> Epoch: 2 Average val loss: 24035.3594
Min loss 24035.36
====> Epoch: 3 Average train loss: 23930.0234
====> Epoch: 3 Average val loss: 21451.0508
Min loss 21451.05
====> Epoch: 4 Average train loss: 21329.8359
====> Epoch: 4 Average val loss: 19741.1660
Min loss 19741.17
====> Epoch: 5 Average train loss: 19601.3438
====> Epoch: 5 Average val loss: 18645.6992
Min loss 18645.70
====> Epoch: 6 Average train loss: 18487.0000
====> Epoch: 6 Average val loss: 17987.4961
Min loss 17987.50
====> Epoch: 7 Average train loss: 17812.5781
====> Epoch: 7 Average val loss: 17634.9453
Min loss 17634.95
====> Epoch: 8 Average train loss: 17448.5371
====> Epoch: 8 Average val loss: 17476.7012
Min loss 17476.70
====> Epoch: 9 Average train loss: 17284.5312
====> Epoch: 9 Average val loss: 17419.5684
Min loss 17419

# Deep Variational CCA

In [18]:
"""
### Deep Variational Learning
Finally we have Deep Variational CCA methods.
- Deep Variational CCA (DVCCA)
- Deep Variational CCA - private (DVVCA_p)

These are both implemented by the DVCCA class with private=True/False and both_encoders=True/False. If both_encoders,
the encoder to the shared information Q(z_shared|x) is modelled for both x_1 and x_2 whereas if both_encoders is false
it is modelled for x_1 as in the paper
"""
from cca_zoo import dvcca

# %%
# DVCCA (technically bi-DVCCA)
print('DVCCA')
encoder_1 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784, variational=True)
encoder_2 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784, variational=True)
decoder_1 = deep_models.Decoder(latent_dims=latent_dims, feature_size=784, norm_output=True)
decoder_2 = deep_models.Decoder(latent_dims=latent_dims, feature_size=784, norm_output=True)
dvcca_model = dvcca.DVCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2], decoders=[decoder_1, decoder_2],
                          private=False)

dvcca_model = deepwrapper.DeepWrapper(dvcca_model)

dvcca_model.fit(train_dataset, val_dataset, epochs=epochs)

dvcca_model_results = np.stack(
    (dvcca_model.train_correlations[0, 1], dvcca_model.predict_corr(test_view_1, test_view_2)[0, 1]))

# DVCCA_private (technically bi-DVCCA_private)
print('DVCCA_private')
encoder_1 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784, variational=True)
encoder_2 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784, variational=True)
private_encoder_1 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784, variational=True)
private_encoder_2 = deep_models.Encoder(latent_dims=latent_dims, feature_size=784, variational=True)
decoder_1 = deep_models.Decoder(latent_dims=latent_dims * 2, feature_size=784, norm_output=True)
decoder_2 = deep_models.Decoder(latent_dims=latent_dims * 2, feature_size=784, norm_output=True)
dvccap_model = dvcca.DVCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2], decoders=[decoder_1, decoder_2],
                           private_encoders=[private_encoder_1, private_encoder_2], private=True)

dvccap_model = deepwrapper.DeepWrapper(dvccap_model)

dvccap_model.fit(train_dataset, val_dataset, epochs=epochs)

dvccap_model_results = np.stack(
    (dvccap_model.train_correlations[0, 1], dvccap_model.predict_corr(test_view_1, test_view_2)[0, 1]))

DVCCA
total parameters:  405032
====> Epoch: 1 Average train loss: 1110.1912
====> Epoch: 1 Average val loss: 1088.3569
Min loss 1088.36
====> Epoch: 2 Average train loss: 1088.2148
====> Epoch: 2 Average val loss: 1067.7648
Min loss 1067.76
====> Epoch: 3 Average train loss: 1067.6774
====> Epoch: 3 Average val loss: 1049.3245
Min loss 1049.32
====> Epoch: 4 Average train loss: 1049.3337
====> Epoch: 4 Average val loss: 1031.4686
Min loss 1031.47
====> Epoch: 5 Average train loss: 1031.2385
====> Epoch: 5 Average val loss: 1016.6136
Min loss 1016.61
====> Epoch: 6 Average train loss: 1014.9791
====> Epoch: 6 Average val loss: 1001.1571
Min loss 1001.16
====> Epoch: 7 Average train loss: 1001.0985
====> Epoch: 7 Average val loss: 988.1716
Min loss 988.17
====> Epoch: 8 Average train loss: 988.9889
====> Epoch: 8 Average val loss: 976.9553
Min loss 976.96
====> Epoch: 9 Average train loss: 974.0115
====> Epoch: 9 Average val loss: 965.7035
Min loss 965.70
====> Epoch: 10 Average train l

# Convolutional Deep CCA (and using other architectures)
We provide a standard CNN encoder and decoder but users can build their own encoders and decoders by inheriting BaseEncoder and BaseDecoder for seamless integration with the pipeline

In [20]:
print('Convolutional DCCA')
encoder_1 = deep_models.CNNEncoder(latent_dims=latent_dims, channels=[3, 3])
encoder_2 = deep_models.CNNEncoder(latent_dims=latent_dims, channels=[3, 3])
dcca_conv_model = dcca.DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2])

dcca_conv_model = deepwrapper.DeepWrapper(dcca_conv_model)

# to change the models used change the cfg.encoder_models. We implement a CNN_Encoder and CNN_decoder as well
# as some based on brainnet architecture in cca_zoo.deep_models. Equally you could pass your own encoder/decoder models

dcca_conv_model.fit(train_view_1.reshape((-1, 1, 28, 28)), train_view_2.reshape((-1, 1, 28, 28)), epochs=epochs)

dcca_conv_results = np.stack(
    (dcca_conv_model.train_correlations[0, 1], dcca_conv_model.predict_corr(test_view_1.reshape((-1, 1, 28, 28)),
                                                                            test_view_2.reshape(
                                                                                (-1, 1, 28, 28)))[0, 1]))

Convolutional DCCA
total parameters:  9568
====> Epoch: 1 Average train loss: -0.1122
====> Epoch: 1 Average val loss: -0.6000
Min loss -0.60
====> Epoch: 2 Average train loss: -0.6667
====> Epoch: 2 Average val loss: -0.8313
Min loss -0.83
====> Epoch: 3 Average train loss: -1.0002
====> Epoch: 3 Average val loss: -0.9811
Min loss -0.98
====> Epoch: 4 Average train loss: -1.2072
====> Epoch: 4 Average val loss: -1.0786
Min loss -1.08
====> Epoch: 5 Average train loss: -1.3414
====> Epoch: 5 Average val loss: -1.1450
Min loss -1.14
====> Epoch: 6 Average train loss: -1.4371
====> Epoch: 6 Average val loss: -1.1919
Min loss -1.19
====> Epoch: 7 Average train loss: -1.5117
====> Epoch: 7 Average val loss: -1.2256
Min loss -1.23
====> Epoch: 8 Average train loss: -1.5730
====> Epoch: 8 Average val loss: -1.2490
Min loss -1.25
====> Epoch: 9 Average train loss: -1.6245
====> Epoch: 9 Average val loss: -1.2644
Min loss -1.26
====> Epoch: 10 Average train loss: -1.6675
====> Epoch: 10 Averag

# Generate Some Plots

In [21]:
"""
### Make results plot to compare methods
"""
# %%

all_results = np.stack(
    [linear_cca_results, gcca_results, mcca_results, pls_results, pmd_results, elastic_results,
     scca_results, kernel_reg_results, kernel_poly_results,
     kernel_gaussian_results, dcca_results, dgcca_results, dmcca_results, dvcca_model_results,
     dcca_conv_results],
    axis=0)
all_labels = ['linear', 'gcca', 'mcca', 'pls', 'pmd', 'elastic', 'scca', 'linear kernel', 'polynomial kernel',
              'gaussian kernel', 'deep CCA', 'deep generalized CCA', 'deep multiset CCA', 'deep VCCA',
              'deep convolutional cca']

from cca_zoo import plot_utils

plot_utils.plot_results(all_results, all_labels)
plt.show()