<a href="https://colab.research.google.com/github/jameschapman19/cca_zoo/blob/main/tutorial_notebooks/cca_zoo_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href="https://colab.research.google.com/github/jameschapman19/cca_zoo/blob/master/tutorial_notebooks/cca_zoo_tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# A tutorial comparing the train and test correlations of different models on MNIST data

In [None]:
!pip install --upgrade cca-zoo[deep,probabilistic]



In [None]:
# Imports
import numpy as np
from cca_zoo.data import Noisy_MNIST_Dataset, CCA_Dataset
import itertools
import matplotlib.pyplot as plt
from torch.utils.data import Subset, DataLoader
from torch import optim
from cca_zoo.deepmodels import objectives, architectures, CCALightning, DCCA,DCCA_NOI,DVCCA,DCCAE,DTCCA, get_dataloaders
from sklearn.utils.fixes import loguniform
import pytorch_lightning as pl
# Load MNIST Data
N = 500
dataset = Noisy_MNIST_Dataset(mnist_type='FashionMNIST', train=True)
ids = np.arange(min(2 * N, len(dataset)))
np.random.shuffle(ids)
train_ids, val_ids = np.array_split(ids, 2)
val_dataset = Subset(dataset, val_ids)
train_dataset = Subset(dataset, train_ids)
test_dataset = Noisy_MNIST_Dataset(mnist_type='FashionMNIST', train=False)
test_ids = np.arange(min(N, len(test_dataset)))
np.random.shuffle(test_ids)
test_dataset = Subset(test_dataset, test_ids)
(train_view_1, train_view_2),_ = train_dataset.dataset.to_numpy(
    train_dataset.indices)
(val_view_1, val_view_2),_ = val_dataset.dataset.to_numpy(val_dataset.indices)
(test_view_1, test_view_2),_ = test_dataset.dataset.to_numpy(
    test_dataset.indices)
train_loader, val_loader = get_dataloaders(train_dataset, val_dataset)
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset))
# Settings

# The number of latent dimensions across models
latent_dims = 2
# The number of cv used for cross-validation/hyperparameter tuning
cv = 3
# For running hyperparameter tuning in parallel (0 if not)
jobs = 4
# Number of iterations for iterative algorithms
max_iter = 2
# number of epochs for deep models
epochs = 50

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


# Canonical Correlation Analysis

In [None]:
from cca_zoo.models import CCA, CCA_ALS
"""
### Linear CCA by eigendecomposition
"""
linear_cca = CCA(latent_dims=latent_dims)

linear_cca.fit((train_view_1, train_view_2))

linear_cca_results = np.stack(
    (linear_cca.score((train_view_1, train_view_2)), linear_cca.score((test_view_1, test_view_2))))

"""
### Linear CCA by alternating least squares (can pass more than 2 views)
"""

linear_cca_als = CCA_ALS(latent_dims=latent_dims)

linear_cca_als.fit((train_view_1, train_view_2))

linear_cca_als_results = np.stack(
    (linear_cca_als.score((train_view_1, train_view_2)), linear_cca_als.score((test_view_1, test_view_2))))

# Partial Least Squares


In [None]:
from cca_zoo.models import PLS, PLS_ALS
"""
### PLS (2 views)
"""
pls = PLS(latent_dims=latent_dims)

pls.fit((train_view_1, train_view_2))

pls_results = np.stack(
    (pls.score((train_view_1, train_view_2)), pls.score((test_view_1, test_view_2))))

pls_als = PLS_ALS(latent_dims=latent_dims)

pls_als.fit((train_view_1, train_view_2))

pls_als_results = np.stack(
    (pls_als.score((train_view_1, train_view_2)), pls_als.score((test_view_1, test_view_2))))

# Extension to multiple views



In [None]:
from cca_zoo.models import GCCA, MCCA, PLS_ALS
"""
### (Regularized) Generalized CCA(can pass more than 2 views)
"""
train_view_3=train_view_1+np.random.rand(*train_view_1.shape)
test_view_3=test_view_1+np.random.rand(*test_view_1.shape)

# small ammount of regularisation added since data is not full rank
c=[0.5,0.5,0.5]

gcca = GCCA(latent_dims=latent_dims,c=c)

gcca.fit((train_view_1, train_view_2,train_view_3))

gcca_results = np.stack((gcca.score((train_view_1, train_view_2, train_view_3)), gcca.score((test_view_1, test_view_2, test_view_3))))

"""
### (Regularized) Multiset CCA(can pass more than 2 views)
"""

mcca = MCCA(latent_dims=latent_dims, c=c)

mcca.fit((train_view_1, train_view_2,train_view_1))

mcca_results = np.stack((mcca.score((train_view_1, train_view_2, train_view_3)), mcca.score((test_view_1, test_view_2, test_view_3))))

"""
### Multiset CCA by alternating least squares
"""
mcca_als = CCA_ALS(latent_dims=latent_dims, max_iter=max_iter)

mcca_als.fit((train_view_1, train_view_2,train_view_3))

mcca_als_results = np.stack(
    (mcca_als.score((train_view_1, train_view_2, train_view_3)), mcca_als.score((test_view_1, test_view_2, test_view_3))))

"""
### Multiset PLS by alternating least squares
"""
mcca_pls = PLS_ALS(latent_dims=latent_dims)

mcca_pls.fit((train_view_1, train_view_2,train_view_1))

mcca_pls_results = np.stack(
    (mcca_als.score((train_view_1, train_view_2, train_view_3)), mcca_pls.score((test_view_1, test_view_2, test_view_3))))



# Tensor CCA

In [None]:
from cca_zoo.models import TCCA
"""
### (Regularized) Tensor CCA(can pass more than 2 views)
"""

tcca = TCCA(latent_dims=latent_dims, c=c)

#memory requirement for tensor is massive so take first 100 features
tcca.fit((train_view_1[:,:100], train_view_2[:,:100],train_view_3[:,:100]))

tcca_results = np.stack((tcca.score((train_view_1[:,:100], train_view_2[:,:100], train_view_3[:,:100])), tcca.score((test_view_1[:,:100], test_view_2[:,:100], test_view_3[:,:100]))))

reconstruction error=0.9661771276746018
iteration 1, reconstruction error: 0.9518385892645673, decrease = 0.014338538410034518, unnormalized = 24.974625897501497
iteration 2, reconstruction error: 0.9507160541458431, decrease = 0.0011225351187241772, unnormalized = 24.945172484955357
iteration 3, reconstruction error: 0.9507116250799326, decrease = 4.429065910582786e-06, unnormalized = 24.945056273797878
iteration 4, reconstruction error: 0.950711606421758, decrease = 1.8658174560926e-08, unnormalized = 24.945055784239106
iteration 5, reconstruction error: 0.950711606295279, decrease = 1.26479049455952e-10, unnormalized = 24.945055780920512
PARAFAC converged after 5 iterations


# Weighted GCCA/Missing Observation GCCA

In [None]:
#observation_matrix
K = np.ones((3, N))
K[0, 200:] = 0
K[1, :100] = 0

#view weights
view_weights=[1,2,1.2]

c=[0.5,0.5,0.5]

gcca = GCCA(latent_dims=latent_dims,c=c,view_weights=view_weights)

gcca.fit((train_view_1, train_view_2,train_view_1),K=K)

gcca_results = np.stack((gcca.score((train_view_1, train_view_2)), gcca.score((test_view_1, test_view_2))))

# Regularised CCA solutions based on alternating minimisation/alternating least squares

We implement Witten's penalized matrix decomposition form of sparse CCA using 'pmd'

We implement Waaijenborg's penalized CCA using elastic net using 'elastic'

We implement Mai's sparse CCA using 'scca'

Furthermore, any of these methods can be extended to multiple views. Witten describes this method explicitly.

In [None]:
from cca_zoo.model_selection import GridSearchCV, RandomizedSearchCV
from cca_zoo.models import rCCA, PMD,SCCA,ElasticCCA

def scorer(estimator,X):
  dim_corrs=estimator.score(X)
  return dim_corrs.mean()

"""
### Ridge CCA (can pass more than 2 views)
"""
c1 = [0.1, 0.3, 0.7, 0.9]
c2 = [0.1, 0.3, 0.7, 0.9]
param_grid = {'c': [c1,c2]}

ridge = GridSearchCV(rCCA(latent_dims=latent_dims),param_grid=param_grid,
    cv=cv,
    verbose=True,scoring=scorer).fit([train_view_1,train_view_2]).best_estimator_

ridge_results = np.stack((ridge.score((train_view_1,train_view_2)), ridge.score((test_view_1, test_view_2))))

"""
### Sparse CCA (Penalized Matrix Decomposition) (can pass more than 2 views)
"""

# PMD
c1 = [1, 3, 7, 9]
c2 = [1, 3, 7, 9]
param_grid = {'c': [c1,c2]}

pmd = GridSearchCV(PMD(latent_dims=latent_dims),param_grid=param_grid,
    cv=cv,
    verbose=True,scoring=scorer).fit([train_view_1,train_view_2]).best_estimator_

pmd_results = np.stack((pmd.score((train_view_1,train_view_2)), pmd.score((test_view_1, test_view_2))))

"""
### Sparse CCA (can pass more than 2 views)
"""

# Sparse CCA
c1 = [0.00001, 0.0001]
c2 = [0.00001, 0.0001]
param_grid = {'c': [c1,c2]}

scca = GridSearchCV(SCCA(latent_dims=latent_dims),param_grid=param_grid,
    cv=cv,
    verbose=True,scoring=scorer).fit([train_view_1,train_view_2]).best_estimator_

scca_results = np.stack(
    (scca.score((train_view_1,train_view_2)), scca.score((test_view_1, test_view_2))))


"""
### Elastic CCA (can pass more than 2 views)
"""

# Elastic CCA
c1 = loguniform(1e-4, 1e0)
c2 = loguniform(1e-4, 1e0)
l1_1 = loguniform(1e-4, 1e0)
l1_2 = loguniform(1e-4, 1e0)
param_grid = {'c': [c1,c2], 'l1_ratio': [l1_1,l1_2]}

elastic = RandomizedSearchCV(ElasticCCA(latent_dims=latent_dims),param_distributions=param_grid,
    cv=cv,
    verbose=True,n_iter=5,scoring=scorer).fit([train_view_1,train_view_2]).best_estimator_

elastic_results = np.stack(
    (elastic.score((train_view_1,train_view_2)), elastic.score((test_view_1, test_view_2))))

Fitting 3 folds for each of 16 candidates, totalling 48 fits
Fitting 3 folds for each of 16 candidates, totalling 48 fits
Fitting 3 folds for each of 4 candidates, totalling 12 fits
Fitting 3 folds for each of 5 candidates, totalling 15 fits


# Kernel CCA

In [None]:
from cca_zoo.models import KCCA
"""
### Kernel CCA

Similarly, we can use kernel CCA methods with [method='kernel']

We can use different kernels and their associated parameters in a similar manner to before
- regularized linear kernel CCA: parameters :  'kernel'='linear', 0<'c'<1
- polynomial kernel CCA: parameters : 'kernel'='poly', 'degree', 0<'c'<1
- gaussian rbf kernel CCA: parameters : 'kernel'='gaussian', 'sigma', 0<'c'<1
"""
# %%
# r-kernel cca
c1 = [0.9, 0.99]
c2 = [0.9, 0.99]

param_grid = {'kernel': ['linear'], 'c': [c1,c2]}

kernel_reg = GridSearchCV(KCCA(latent_dims=latent_dims),param_grid=param_grid,
    cv=cv,
    verbose=True,scoring=scorer).fit([train_view_1,train_view_2]).best_estimator_
kernel_reg_results = np.stack((
    kernel_reg.score((train_view_1,train_view_2)),
    kernel_reg.score((test_view_1, test_view_2))))

# kernel cca (poly)
degree1 = [2, 3]
degree2 = [2, 3]

param_grid = {'kernel': ['poly'], 'degree': [degree1,degree2],
                    'c': [c1,c2]}

kernel_poly = GridSearchCV(KCCA(latent_dims=latent_dims),param_grid=param_grid,
    cv=cv,
    verbose=True,scoring=scorer).fit([train_view_1,train_view_2]).best_estimator_

kernel_poly_results = np.stack((
    kernel_poly.score((train_view_1,train_view_2)),
    kernel_poly.score((test_view_1, test_view_2))))

# kernel cca (gaussian)
gamma1 = [1e+1, 1e+2, 1e+3]
gamma2 = [1e+1, 1e+2, 1e+3]

param_grid = {'kernel': ['rbf'], 'gamma': [gamma1,gamma2],
                    'c': [c1,c2]}

kernel_gaussian = GridSearchCV(KCCA(latent_dims=latent_dims),param_grid=param_grid,
    cv=cv,
    verbose=True,scoring=scorer).fit([train_view_1,train_view_2]).best_estimator_

kernel_gaussian_results = np.stack((
    kernel_gaussian.score((train_view_1,train_view_2)),
    kernel_gaussian.score((test_view_1, test_view_2))))

Fitting 3 folds for each of 4 candidates, totalling 12 fits
Fitting 3 folds for each of 16 candidates, totalling 48 fits
Fitting 3 folds for each of 36 candidates, totalling 108 fits


  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stddev[:, None]
  c /= stdd

# Deep CCA

DCCA can be optimized using Andrew's original tracenorm objective or Wang's DCCA by nonlinear orthogonal iterations using the argument als=True.

In [None]:
"""
### Deep Learning

We also have deep CCA methods (and autoencoder variants)
- Deep CCA (DCCA)
- Deep Canonically Correlated Autoencoders (DCCAE)

"""

# %%
# DCCA
print('DCCA')
encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
dcca_model = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2])

dcca_model = CCALightning(dcca_model)
trainer = pl.Trainer(max_epochs=epochs, progress_bar_refresh_rate=1, log_every_n_steps=1, logger=False)
trainer.fit(dcca_model, train_loader, val_loader)

dcca_results = np.stack((dcca_model.score(train_dataset), dcca_model.score(test_loader)))

# DCCA_NOI
print('DCCA by non-linear orthogonal iterations')
encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
dcca_noi_model = DCCA_NOI(latent_dims=latent_dims, encoders=[encoder_1, encoder_2],N=len(train_dataset))

dcca_noi_model = CCALightning(dcca_noi_model)
trainer = pl.Trainer(max_epochs=epochs, progress_bar_refresh_rate=1, log_every_n_steps=1, logger=False)
trainer.fit(dcca_noi_model, train_loader, val_loader)

dcca_noi_results = np.stack(
    (dcca_noi_model.score(train_dataset), dcca_noi_model.score(test_dataset)))

DCCA
total parameters:  201476
====> Epoch: 1 Average train loss: -0.2531
====> Epoch: 1 Average val loss: -0.1096
Min loss -0.11
====> Epoch: 2 Average train loss: -0.1009
====> Epoch: 2 Average val loss: -0.1308
Min loss -0.13
====> Epoch: 3 Average train loss: -0.2040
====> Epoch: 3 Average val loss: -0.1842
Min loss -0.18
====> Epoch: 4 Average train loss: -0.1796
====> Epoch: 4 Average val loss: -0.1576
====> Epoch: 5 Average train loss: -0.2058
====> Epoch: 5 Average val loss: -0.3233
Min loss -0.32
====> Epoch: 6 Average train loss: -0.3632
====> Epoch: 6 Average val loss: -0.4711
Min loss -0.47
====> Epoch: 7 Average train loss: -0.4117
====> Epoch: 7 Average val loss: -0.4762
Min loss -0.48
====> Epoch: 8 Average train loss: -0.4564
====> Epoch: 8 Average val loss: -0.4032
====> Epoch: 9 Average train loss: -0.5325
====> Epoch: 9 Average val loss: -0.4755
====> Epoch: 10 Average train loss: -0.5422
====> Epoch: 10 Average val loss: -0.5092
Min loss -0.51
====> Epoch: 11 Averag

# DCCA with custom optimizers and schedulers

In [None]:
# DCCA
print('DCCA')
encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
dcca_model = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2])
optimizer = optim.Adam(dcca_model.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 1)
dcca_model = CCALightning(dcca_model)
trainer = pl.Trainer(max_epochs=epochs, progress_bar_refresh_rate=1, log_every_n_steps=1, logger=False)
trainer.fit(dcca_model, train_loader, val_loader)

dcca_results = np.stack((dcca_model.score(train_dataset), dcca_model.score(test_dataset)))

DCCA
total parameters:  201476
====> Epoch: 1 Average train loss: -0.2345
====> Epoch: 1 Average val loss: -0.1404
Min loss -0.14
====> Epoch: 2 Average train loss: -0.1096
====> Epoch: 2 Average val loss: -0.1372
====> Epoch: 3 Average train loss: -0.0880
====> Epoch: 3 Average val loss: -0.0736
====> Epoch: 4 Average train loss: -0.0473
====> Epoch: 4 Average val loss: -0.1699
Min loss -0.17
====> Epoch: 5 Average train loss: -0.1097
====> Epoch: 5 Average val loss: -0.1187
====> Epoch: 6 Average train loss: -0.1023
====> Epoch: 6 Average val loss: -0.0718
====> Epoch: 7 Average train loss: -0.1285
====> Epoch: 7 Average val loss: -0.1539
====> Epoch: 8 Average train loss: -0.0861
====> Epoch: 8 Average val loss: -0.0859
====> Epoch: 9 Average train loss: -0.0531
====> Epoch: 9 Average val loss: -0.0679
====> Epoch: 10 Average train loss: -0.0867
====> Epoch: 10 Average val loss: -0.0726
====> Epoch: 11 Average train loss: -0.0745
====> Epoch: 11 Average val loss: -0.0786
====> Epoch

# DGCCA and DMCCA for more than 2 views

The only change we need to make is to the objective argument to perform DGCCA and DMCCA.

In [None]:
# DGCCA
print('DGCCA')
encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
dgcca_model = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2], objective=objectives.GCCA)

dgcca_model = CCALightning(dgcca_model)
trainer = pl.Trainer(max_epochs=epochs, progress_bar_refresh_rate=1, log_every_n_steps=1, logger=False)
trainer.fit(dgcca_model, train_loader, val_loader)

dgcca_results = np.stack(
    (dgcca_model.score(train_dataset), dgcca_model.score(test_dataset)))

# DMCCA
print('DMCCA')
encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
dmcca_model = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2], objective=objectives.MCCA)

dmcca_model = CCALightning(dmcca_model)
trainer = pl.Trainer(max_epochs=epochs, progress_bar_refresh_rate=1, log_every_n_steps=1, logger=False)
trainer.fit(dmcca_model, train_loader, val_loader)

dmcca_results = np.stack(
    (dmcca_model.score(train_dataset), dmcca_model.score(test_dataset)))

DGCCA
total parameters:  201476
====> Epoch: 1 Average train loss: -0.3717
====> Epoch: 1 Average val loss: -0.2874
Min loss -0.29
====> Epoch: 2 Average train loss: -0.4010
====> Epoch: 2 Average val loss: -0.3402
Min loss -0.34
====> Epoch: 3 Average train loss: -0.5026
====> Epoch: 3 Average val loss: -0.4891
Min loss -0.49
====> Epoch: 4 Average train loss: -0.4654
====> Epoch: 4 Average val loss: -0.4745
====> Epoch: 5 Average train loss: -0.6690
====> Epoch: 5 Average val loss: -0.5786
Min loss -0.58
====> Epoch: 6 Average train loss: -0.5903
====> Epoch: 6 Average val loss: -0.5848
Min loss -0.58
====> Epoch: 7 Average train loss: -0.6430
====> Epoch: 7 Average val loss: -0.6894
Min loss -0.69
====> Epoch: 8 Average train loss: -0.7384
====> Epoch: 8 Average val loss: -0.6960
Min loss -0.70
====> Epoch: 9 Average train loss: -0.7716
====> Epoch: 9 Average val loss: -0.7567
Min loss -0.76
====> Epoch: 10 Average train loss: -0.8274
====> Epoch: 10 Average val loss: -0.7267
====> 

# Deep Canonically Correlated Autoencoders
We need to add decoders in order to model deep canonically correlated autoencoders and we also use the DCCAE class which inherits from DCCA

In [None]:
# DCCAE
print('DCCAE')
encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
decoder_1 = architectures.Decoder(latent_dims=latent_dims, feature_size=784)
decoder_2 = architectures.Decoder(latent_dims=latent_dims, feature_size=784)
dccae_model = DCCAE(latent_dims=latent_dims, encoders=[encoder_1, encoder_2], decoders=[decoder_1, decoder_2])

dccae_model = CCALightning(dccae_model)
trainer = pl.Trainer(max_epochs=epochs, progress_bar_refresh_rate=1, log_every_n_steps=1, logger=False)
trainer.fit(dccae_model, train_loader, val_loader)

dccae_results = np.stack(
    (dccae_model.score(train_dataset), dccae_model.score(test_dataset)))

DCCAE
total parameters:  404516
====> Epoch: 1 Average train loss: -0.0019
====> Epoch: 2 Average train loss: -0.0677
====> Epoch: 3 Average train loss: -0.1219
====> Epoch: 4 Average train loss: -0.1684
====> Epoch: 5 Average train loss: -0.2075
====> Epoch: 6 Average train loss: -0.2417
====> Epoch: 7 Average train loss: -0.2722
====> Epoch: 8 Average train loss: -0.3001
====> Epoch: 9 Average train loss: -0.3262
====> Epoch: 10 Average train loss: -0.3502
====> Epoch: 11 Average train loss: -0.3730
====> Epoch: 12 Average train loss: -0.3943
====> Epoch: 13 Average train loss: -0.4143
====> Epoch: 14 Average train loss: -0.4332
====> Epoch: 15 Average train loss: -0.4514
====> Epoch: 16 Average train loss: -0.4691
====> Epoch: 17 Average train loss: -0.4865
====> Epoch: 18 Average train loss: -0.5030
====> Epoch: 19 Average train loss: -0.5188
====> Epoch: 20 Average train loss: -0.5341
====> Epoch: 21 Average train loss: -0.5490
====> Epoch: 22 Average train loss: -0.5638
====> Epo

# Deep Variational CCA

In [None]:
"""
### Deep Variational Learning
Finally we have Deep Variational CCA methods.
- Deep Variational CCA (DVCCA)
- Deep Variational CCA - private (DVVCA_p)

These are both implemented by the DVCCA class with private=True/False and both_encoders=True/False. If both_encoders,
the encoder to the shared information Q(z_shared|x) is modelled for both x_1 and x_2 whereas if both_encoders is false
it is modelled for x_1 as in the paper
"""

# %%
# DVCCA (technically bi-DVCCA)
print('DVCCA')
encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=784, variational=True)
encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=784, variational=True)
decoder_1 = architectures.Decoder(latent_dims=latent_dims, feature_size=784, norm_output=True)
decoder_2 = architectures.Decoder(latent_dims=latent_dims, feature_size=784, norm_output=True)
dvcca_model = DVCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2], decoders=[decoder_1, decoder_2])

dvcca_model = CCALightning(dvcca_model)
trainer = pl.Trainer(max_epochs=epochs, progress_bar_refresh_rate=1, log_every_n_steps=1, logger=False)
trainer.fit(dvcca_model, train_loader, val_loader)

dvcca_model_results = np.stack(
    (dvcca_model.score(train_dataset), dvcca_model.score(test_dataset)))

# DVCCA_private (technically bi-DVCCA_private)
print('DVCCA_private')
encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=784, variational=True)
encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=784, variational=True)
private_encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=784, variational=True)
private_encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=784, variational=True)
decoder_1 = architectures.Decoder(latent_dims=latent_dims * 2, feature_size=784, norm_output=True)
decoder_2 = architectures.Decoder(latent_dims=latent_dims * 2, feature_size=784, norm_output=True)
dvccap_model = DVCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2], decoders=[decoder_1, decoder_2],
                           private_encoders=[private_encoder_1, private_encoder_2])

dvccap_model = CCALightning(dvccap_model)
trainer = pl.Trainer(max_epochs=epochs, progress_bar_refresh_rate=1, log_every_n_steps=1, logger=False)
trainer.fit(dvccap_model, train_loader, val_loader)

dvccap_model_results = np.stack(
    (dvccap_model.score(train_dataset), dvccap_model.score(test_dataset)))

DVCCA
total parameters:  405032
====> Epoch: 1 Average train loss: 1109.5615
====> Epoch: 1 Average val loss: 1106.4091
Min loss 1106.41
====> Epoch: 2 Average train loss: 1106.5356
====> Epoch: 2 Average val loss: 1103.5577
Min loss 1103.56
====> Epoch: 3 Average train loss: 1103.5905
====> Epoch: 3 Average val loss: 1100.6377
Min loss 1100.64
====> Epoch: 4 Average train loss: 1100.4827
====> Epoch: 4 Average val loss: 1097.5350
Min loss 1097.54
====> Epoch: 5 Average train loss: 1097.5166
====> Epoch: 5 Average val loss: 1094.5754
Min loss 1094.58
====> Epoch: 6 Average train loss: 1094.8094
====> Epoch: 6 Average val loss: 1091.9133
Min loss 1091.91
====> Epoch: 7 Average train loss: 1092.1809
====> Epoch: 7 Average val loss: 1089.2421
Min loss 1089.24
====> Epoch: 8 Average train loss: 1089.0867
====> Epoch: 8 Average val loss: 1086.4995
Min loss 1086.50
====> Epoch: 9 Average train loss: 1086.4094
====> Epoch: 9 Average val loss: 1083.7737
Min loss 1083.77
====> Epoch: 10 Average

# Convolutional Deep CCA (and using other architectures)
We provide a standard CNN encoder and decoder but users can build their own encoders and decoders by inheriting BaseEncoder and BaseDecoder for seamless integration with the pipeline

In [None]:
print('Convolutional DCCA')
encoder_1 = architectures.CNNEncoder(latent_dims=latent_dims, channels=[3, 3])
encoder_2 = architectures.CNNEncoder(latent_dims=latent_dims, channels=[3, 3])
dcca_conv_model = DCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2])

conv_train_view_1=train_view_1.reshape((-1, 1, 28, 28))
conv_train_view_2=train_view_2.reshape((-1, 1, 28, 28))
conv_test_view_1=test_view_1.reshape((-1, 1, 28, 28))
conv_test_view_2=test_view_2.reshape((-1, 1, 28, 28))
conv_dataset=CCA_Dataset((conv_train_view_1,conv_train_view_2))
test_conv_dataset=CCA_Dataset((conv_test_view_1,conv_test_view_2))
conv_train_loader=get_dataloaders(conv_dataset)
conv_test_loader=get_dataloaders(test_conv_dataset)

dcca_conv_model = CCALightning(dcca_conv_model)
trainer = pl.Trainer(max_epochs=epochs, progress_bar_refresh_rate=1, log_every_n_steps=1, logger=False)
trainer.fit(dcca_conv_model, train_loader)

dcca_conv_results = np.stack((
    dcca_conv_model.score(conv_train_loader),
    dcca_conv_model.score(conv_test_loader)))

Convolutional DCCA
total parameters:  9568
====> Epoch: 1 Average train loss: -0.6154
====> Epoch: 2 Average train loss: -0.7844
====> Epoch: 3 Average train loss: -0.9141
====> Epoch: 4 Average train loss: -1.0193
====> Epoch: 5 Average train loss: -1.1371
====> Epoch: 6 Average train loss: -1.2470
====> Epoch: 7 Average train loss: -1.3430
====> Epoch: 8 Average train loss: -1.4286
====> Epoch: 9 Average train loss: -1.4995
====> Epoch: 10 Average train loss: -1.5591


# DTCCA

In [None]:
# %%
# DTCCA
print('DTCCA')
encoder_1 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
encoder_2 = architectures.Encoder(latent_dims=latent_dims, feature_size=784)
dtcca_model = DTCCA(latent_dims=latent_dims, encoders=[encoder_1, encoder_2])

dtcca_model = CCALightning(dtcca_model)
trainer = pl.Trainer(max_epochs=epochs, progress_bar_refresh_rate=1, log_every_n_steps=1, logger=False)
trainer.fit(dtcca_model, train_loader, val_loader)

dtcca_results = np.stack((dtcca_model.score(train_dataset), dtcca_model.score(test_dataset)))

DTCCA
total parameters:  201476




====> Epoch: 1 Average train loss: 0.0000
====> Epoch: 1 Average val loss: 0.0000
Min loss 0.00
====> Epoch: 2 Average train loss: 0.0000
====> Epoch: 2 Average val loss: 0.0000
Min loss 0.00
====> Epoch: 3 Average train loss: 0.0000
====> Epoch: 3 Average val loss: 0.0000
====> Epoch: 4 Average train loss: 0.0000
====> Epoch: 4 Average val loss: 0.0000
====> Epoch: 5 Average train loss: 0.0000
====> Epoch: 5 Average val loss: 0.0000
====> Epoch: 6 Average train loss: 0.0000
====> Epoch: 6 Average val loss: 0.0000
====> Epoch: 7 Average train loss: 0.0000
====> Epoch: 7 Average val loss: 0.0000
====> Epoch: 8 Average train loss: 0.0000
====> Epoch: 8 Average val loss: 0.0000
====> Epoch: 9 Average train loss: 0.0000
====> Epoch: 9 Average val loss: 0.0000
====> Epoch: 10 Average train loss: 0.0000
====> Epoch: 10 Average val loss: 0.0000
====> Epoch: 11 Average train loss: 0.0000
====> Epoch: 11 Average val loss: 0.0000
====> Epoch: 12 Average train loss: 0.0000
====> Epoch: 12 Average