# Implementation of primal dual CCA (aka partially linear CCA)
Hardoon & Shawe-Taylor, 2011

In [119]:
import numpy as np
import pandas as pd
import cvxpy as cp
from scipy.spatial.distance import pdist, squareform
from scipy.optimize import minimize, NonlinearConstraint
import scipy
from sklearn.metrics.pairwise import pairwise_kernels, rbf_kernel


In [230]:
ret = pd.read_csv('ret_subset.csv')


In [231]:
ret = np.array(ret.iloc[:100,1:20])

# primal dual CCA
Hardoon & Shawe-Taylor 2011, non-exact implementation

In [261]:
def gaussian_kernel(X,s=1):
    """
    function to obtain gaussian kernel
    inputs:
            X : numpy matrix of size assets x time
            s : scale factor
    """
    #pairwise_dists = squareform(pdist(X, 'euclidean'))
    #K = scipy.exp(-pairwise_dists ** 2 / s ** 2)
    
    # K(x, y) = exp(-gamma ||x-y||^2)
    K = rbf_kernel(X, gamma=s)
    return K
    
def primal_dual_cca(X, K, seed_index, sk):
    """
     Original description by David R. Hardoon: 
     Sparse Canonical Correlation Analysis - SCCA, is a primal-dual solver for
     the CCA problem. Given primal data of a view and a dual representation of
     the second view will provide a sparse primal weight vector (for the primal
     data) and sparse feature projection (for the dual [kernel] data)

     Input:  X             - Primal data of view one    [m x l] (rows is the number of assets)
             K             - dual data of view two      [l x l]
             seed_index    - Starting point for e       [1 x 1]
             sk            - scaling factor for mu and gamma

     Output: w             - sparse weight vector      [1 x m]
             e             - sparse projct vectors     [1 x l]
             cor           - correlation value         [1 x 1]
    """
    primal_dim = X.shape[0]
    N_samples = X.shape[1]
    tau = 0.5

    #This is how mu and gamma are set in David's SCCA2.m
    Ij = np.zeros((K.shape[1], K.shape[1]))
    np.fill_diagonal(Ij, 1)
    Ij[seed_index, seed_index] = 0
    c = X * K[:,seed_index]
    KK = np.transpose(K) * K
    d1 = 2*tau*(1-tau)*c
    mu = sk*np.mean(np.abs(d1))
    gamma = np.mean(np.abs(2*(1-tau)**2*Ij*KK[:,seed_index]))
    beta = 1
    
    # initial parameters
    w = np.zeros(primal_dim)
    e = np.zeros(N_samples)
    e[seed_index] = 1
    initial = np.concatenate([w,e])
    
    # bounds
    bnds  = [(-np.inf,np.inf) for i in range(primal_dim)]
    bnds2 = [(0, None) for i in range(primal_dim,N_samples+primal_dim)]
    bnds.extend(bnds2)

    # constraints
    const = NonlinearConstraint(kernel_weights_constraint,1.0,1.0)
    
    # minimization
    result = minimize(pl_minimize,x0=initial, args = (X,K,tau,beta,gamma,mu,primal_dim),bounds=bnds, constraints=const).x#, bounds = bnds)#, constraints =(const,))
    
    w = result[:primal_dim]
    e = result[primal_dim:]
    p1 = w @ X @ X.T @ w
    p2 = e @ K @ K @ e
    corr = w @ X @ K @ e / np.sqrt(p1*p2)
    return w,e,corr
    
def kernel_weights_constraint(x):
    """
    need to define a global variable for the dimension of our asset space, currently it is 19
    """
    return np.linalg.norm(x[19:],np.inf)

def pl_minimize(x, *args):
    X, K, tau, beta, gamma, mu, dimension = args
    w = x[:dimension]
    e = x[dimension:]
    res = np.linalg.norm(tau * X.T @ w - (1-tau)*K @ e) + mu*np.linalg.norm(w,1) + gamma*np.linalg.norm(e,1)
    return np.maximum(res, np.zeros(res.shape))**2

In [262]:
# test
k = gaussian_kernel(ret)
opt_w, opt_e, corr = primal_dual_cca(ret.T, k, 10, 0.1)
print(corr)

0.0012532154843144848
0.004532502975463262
119
(119,)
ok 1
ok
3.8391045559411845


# primal dual CCA
exact implementation

In [259]:
k @ k

array([[85.29288114, 86.51760884, 87.40002397, ..., 89.64431682,
        88.07878171, 88.08935249],
       [86.51760884, 87.94930529, 88.80864413, ..., 91.05851987,
        89.52161965, 89.40870527],
       [87.40002397, 88.80864413, 89.74986444, ..., 91.99242156,
        90.43709346, 90.36344359],
       ...,
       [89.64431682, 91.05851987, 91.99242156, ..., 94.32326099,
        92.71407608, 92.65034127],
       [88.07878171, 89.52161965, 90.43709346, ..., 92.71407608,
        91.15178173, 91.05247221],
       [88.08935249, 89.40870527, 90.36344359, ..., 92.65034127,
        91.05247221, 91.04547398]])