In [1]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cross_decomposition import CCA
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt

In [2]:
def project(data_matrix, latent_matrix):
  latent_unit = latent_matrix / np.linalg.norm(latent_matrix)
  proj_scores = np.matmul(data_matrix.T,latent_unit)
  data_projected = np.outer(latent_unit, proj_scores.T)
  return data_projected 

def evr_(data_matrix, latent_matrix):
  data_projected = project(data_matrix, latent_matrix)
  data_orth = data_matrix - data_projected
  evr = 1 - sum(np.var(data_orth,axis=0)) / sum(np.var(data_matrix,axis=0))
  return evr

def evr_orth(dataset, latents):
  evrs = np.apply_along_axis(lambda x: evr_(dataset, x), 0, latents)
  return evrs, sum(evrs)

In [3]:
heart = pd.read_csv('heart.nozscore.csv').to_numpy()
brain = pd.read_csv('brain.nozscore.csv').to_numpy()

##Run Cross Validation to Assess Number and Stability of Components

In [20]:
i = 0 
ncomp = 10
nsplit = 10
cca = CCA(n_components=ncomp, scale = False)
heart_evrs = np.zeros((nsplit,ncomp))
brain_evrs = np.zeros((nsplit,ncomp))
cors = np.zeros((nsplit,ncomp))
for train_index, test_index in KFold(n_splits=10).split(heart):
    heart_train, heart_test = heart[train_index], heart[test_index]
    brain_train, brain_test = brain[train_index], brain[test_index]
    heart_scaler = StandardScaler().fit(heart_train)
    brain_scaler = StandardScaler().fit(brain_train)
    heart_train_transformed = heart_scaler.transform(heart_train)
    brain_train_transformed = brain_scaler.transform(brain_train)
    heart_test_transformed = heart_scaler.transform(heart_test)
    brain_test_transformed = brain_scaler.transform(brain_test)
    cca.fit_transform(heart_train_transformed, brain_train_transformed)
    heart_test_scores, brain_test_scores = cca.transform(heart_test_transformed, brain_test_transformed)
    heart_evrs[i,:] = evr_orth(heart_test_transformed, heart_test_scores)[0]
    brain_evrs[i,:] = evr_orth(brain_test_transformed, brain_test_scores)[0]
    cors[i,:] = np.diag(np.corrcoef(heart_test_scores, brain_test_scores, rowvar=False),10)
    i+=1

In [25]:
pd.DataFrame(heart_evrs).to_csv('heart.evrs.crossval.csv')
pd.DataFrame(brain_evrs).to_csv('brain.evrs.crossval.csv')
pd.DataFrame(cors).to_csv('heart.brain.cors.crossval.csv')

##Run CCA on All Data 

In [None]:
cca = CCA(n_components=10, scale = True)
heart_cc, brain_cc = cca.fit_transform(heart, brain)

In [None]:
pd.DataFrame(cca.x_weights_,index=heart.columns).to_csv('heart_weights.csv')
pd.DataFrame(cca.y_weights_,index=brain.columns).to_csv('brain_weights.csv')
pd.DataFrame(cca.x_loadings_,index=heart.columns).to_csv('heart_loadings.csv')
pd.DataFrame(cca.y_loadings_,index=brain.columns).to_csv('brain_loadings.csv')
pd.DataFrame(cca.x_rotations_,index=heart.columns).to_csv('heart_rotations.csv')
pd.DataFrame(cca.y_rotations_,index=brain.columns).to_csv('brain_rotations.csv')
pd.DataFrame(heart_cc).to_csv('heart_ccs.csv')
pd.DataFrame(brain_cc).to_csv('brain_ccs.csv')