# Testing of the Deep CCA method

### Importing necessary packages:

In [9]:
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
from linear_cca import linear_cca
from torch.utils.data import BatchSampler, SequentialSampler
from DeepCCAModels import DeepCCA
from main import Solver
from utils import load_data, svm_classify
try:
    import cPickle as thepickle
except ImportError:
    import _pickle as thepickle

import gzip

torch.set_default_tensor_type(torch.DoubleTensor)

### Data loading:

In [16]:
"""
    1st view:
    
    This view consists of the imaging data, which have been reduced in 
    dimensionality through OPNMF.Initially there were 145 Regions Of 
    Interest (ROIs), then through Orthogonal Projective Non-Negative 
    Matrix Factorization, the 145 ROIs have been reduced to a set of 18
    components. That is, each of the 599 samples in the database can be
    described using a linear combination of those 18 components weighted
    by their unique coefficient set.
    
    We now load the 599x18 coefficient dataset, that represents the 145
    ROIs:
"""
components = pd.read_pickle("ROI_OPNMF_Component_Coefficients.pkl")
components.head()

Unnamed: 0,Component 1,Component 2,Component 3,Component 4,Component 5,Component 6,Component 7,Component 8,Component 9,Component 10,Component 11,Component 12,Component 13,Component 14,Component 15,Component 16,Component 17,Component 18
0,2.459898,0.423744,0.931475,0.0,0.840692,3.0396,0.935822,0.485301,0.288034,2.447542,0.0,1.193371,2.435244,2.079515,1.530588,1.941043,1.478721,0.364583
1,0.82803,2.786958,3.553455,2.721096,1.646939,6.717957,1.822474,1.822576,2.516197,0.295859,2.407094,0.95139,2.249399,0.0,2.23457,2.560864,1.073027,1.999624
2,6.796636,3.513108,2.574221,0.0,1.022791,2.396651,0.652889,0.588703,0.458944,0.419344,0.0,0.0,1.605214,0.0,1.56931,3.396855,1.636174,1.165219
3,5.059488,3.198297,1.573845,1.239967,0.574101,3.530107,2.466629,0.599458,1.328156,2.271253,0.204155,0.0,2.169149,2.497214,2.349669,2.538714,0.908674,0.34403
4,5.408747,3.722463,2.912291,1.121762,0.0,3.131542,1.542064,1.60095,0.958946,0.564767,0.215583,0.0,3.686751,2.30543,0.963403,1.741627,2.921104,0.0


In [None]:
"""
    2nd view:
    
    
"""
components = pd.read_pickle("ROI_OPNMF_Component_Coefficients.pkl")
components.head()

### Parameters:

In [None]:
# if a gpu exists, torch.device should be 'gpu'
device = torch.device('cpu')
# print("Using", torch.cuda.device_count(), "GPUs")

# the path to save the final learned features
save_to = './new_features.gz'

# the size of the new space learned by the model (number of the new features)
outdim_size = 10

# size of the input for view 1 and view 2
input_shape1 = 784
input_shape2 = 784

# number of layers with nodes in each one
layer_sizes1 = [1024, 1024, 1024, outdim_size]
layer_sizes2 = [1024, 1024, 1024, outdim_size]

# the parameters for training the network
learning_rate = 1e-3
epoch_num = 10
batch_size = 800

# the regularization parameter of the network
# seems necessary to avoid the gradient exploding especially when non-saturating activations are used
reg_par = 1e-5

# specifies if all the singular values should get used to calculate the correlation or just the top outdim_size ones
# if one option does not work for a network or dataset, try the other one
use_all_singular_values = False

# if a linear CCA should get applied on the learned features extracted from the networks
# it does not affect the performance on noisy MNIST significantly
apply_linear_cca = True
# end of parameters section
############

# Each view is stored in a gzip file separately. They will get downloaded the first time the code gets executed.
# Datasets get stored under the datasets folder of user's Keras folder
# normally under [Home Folder]/.keras/datasets/
data1 = load_data('./noisymnist_view1.gz')
data2 = load_data('./noisymnist_view2.gz')    