In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [36]:
import numpy as np
import pandas as pd
from scipy.linalg import eig, eigh, inv
from scipy.stats import f

# Example data
data = pd.DataFrame({
    "y1": [10, 0, 1, 1],
    "y2": [0, 10, 1, 1],
    "x1": [1, 1, 1, 0],
    "x2": [1, 1, 0, 1]
})

# Submatrices
Exx = np.array(data.iloc[:2, :2])  # First 2 rows and columns 1 and 2
Eyx = np.array(data.iloc[2:, :2])  # Rows 3 and 4, columns 1 and 2
Exy = np.array(data.iloc[:2, 2:])  # Rows 1 and 2, columns 3 and 4
Eyy = np.array(data.iloc[2:, 2:])  # Rows 3 and 4, columns 3 and 4

# Inverse matrices
invExx = inv(Exx)
invEyy = inv(Eyy)

# Canonical correlation
H = Eyx @ invExx @ Exy
E = Eyy - Eyx @ invExx @ Exy

# Generalized eigenvalue problem
H = Eyx @ invExx @ Exy  # Numerator
Cancorr_values, Cancorr_vectors = eig(H, Eyy)  # Solve generalized eigenproblem

# Sorting eigenvalues in descending order
Cancorr_values = np.sort(np.real(Cancorr_values))[::-1]  # Keep real parts only

# Variance proportions and cumulative proportions
varPC = Cancorr_values / Cancorr_values.sum()
cumu = np.cumsum(varPC)


invE = inv(E)
eigenvalues, eigenvectors = eigh(invE @ H)

# Sorting eigenvalues in descending order
eigenvalues = np.sort(eigenvalues)[::-1]

# Variance proportions and cumulative proportions
varPC = eigenvalues / eigenvalues.sum()
cumu = np.cumsum(varPC)

# Results DataFrame
# Results DataFrame
results = pd.DataFrame({
    "CanCor": np.sqrt(Cancorr_values),
    "Squared CanCor": Cancorr_values,
    "proportion": varPC,
    "cumulative": cumu
})

# Wilks test approximation
n = 10  # Sample size
p = Exx.shape[0]  # Number of X variables
q = Eyy.shape[0]  # Number of Y variables

# Wilks' Lambda
wilks_lambda = np.prod(1 - Cancorr_values)  # Lambda = product of (1 - CanCor^2)

# Degrees of freedom
s = n - (p + q + 3) / 2
t = np.sqrt((p**2 * q**2 - 4) / (p**2 + q**2 - 5))

# F-statistic
df1 = p * q  # Numerator degrees of freedom
df2 = s * t - (p * q - 2) / 2  # Denominator degrees of freedom
F_stat = ((1 - wilks_lambda**(1/t)) / (wilks_lambda**(1/t))) * (df2 / df1)

# P-value
p_value = f.sf(F_stat, df1, df2)  # Survival function for F-distribution

# Results
hyp_test = {
    "Wilks' Lambda": wilks_lambda,
    "F-statistic": F_stat,
    "p-value": p_value,
    "df1": df1,
    "df2": df2
}


print("Canonical Correlation Results:")
print(results)

print("\nHypothesis Test Results:")
print(hyp_test)

Canonical Correlation Results:
     CanCor  Squared CanCor  proportion  cumulative
0  0.632456             0.4         1.0         1.0
1  0.000000             0.0         0.0         1.0

Hypothesis Test Results:
{"Wilks' Lambda": 0.5999999999999999, 'F-statistic': 0.8729833462074172, 'p-value': 0.5081227166630375, 'df1': 4, 'df2': 12.0}


In [3]:
iris = pd.read_csv("Data/iris.csv").drop(columns=["Species", "Id"])

iris["SepalLenghtCm2"] = iris["SepalLengthCm"]

iris.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,SepalLenghtCm2
0,5.1,3.5,1.4,0.2,5.1
1,4.9,3.0,1.4,0.2,4.9
2,4.7,3.2,1.3,0.2,4.7
3,4.6,3.1,1.5,0.2,4.6
4,5.0,3.6,1.4,0.2,5.0


In [4]:
from sklearn.cross_decomposition import CCA
from sklearn.preprocessing import StandardScaler
import scipy.stats

X = iris[["SepalLengthCm", "SepalWidthCm"]]

Y = iris[["SepalLenghtCm2", "PetalLengthCm", "PetalWidthCm"]]


cca = CCA(n_components=2)
cca.fit(X, Y)

X_c, Y_c = cca.transform(X, Y)

# Canonical Correlation Calculation
canonical_corr = [np.corrcoef(X_c[:, i], Y_c[:, i])[0, 1] for i in range(2)]
print("Canonical Correlations:", canonical_corr)

cca.coef_   

Canonical Correlations: [1.0, 0.7195997573157874]


array([[ 8.28066128e-01, -1.48597934e-16],
       [ 1.21991401e+00, -2.90965540e+00],
       [ 5.11176098e-01, -1.03369094e+00]])

In [42]:
import pandas as pd
import numpy as np
# import standard scaler
from sklearn.preprocessing import StandardScaler

# Define the correlation matrix
data = {
    "_type_": ['corr'] * 18,
    "_Name_": ['pH', 'Water', 'Protein', 'EtherExt', 'Hydroxy', 'CollaSol', 'Lightn', 'Hue', 'DripLoss', 'CookLoss',
               'WBshear', 'Appear', 'EaseSink', 'Friabil', 'Residue', 'InJuice', 'SusJuice', 'OvAcc'],
    "pH": [1, 0.09, 0.28, -0.28, -0.33, -0.08, -0.02, -0.33, 0.01, -0.38, -0.26, 0.1, 0.17, 0.1, 0.08, 0.08, 0.01, 0.13],
    "Water": [0.09, 1, -0.4, -0.16, -0.08, -0.01, 0.03, -0.23, 0.18, 0.15, -0.01, -0.003, -0.16, -0.17, -0.19, -0.08, -0.09, -0.13],
    "Protein": [0.28, -0.4, 1, -0.56, -0.55, -0.03, 0.34, -0.47, -0.07, -0.64, -0.63, 0.25, 0.27, 0.2, 0.23, 0.03, -0.004, 0.21],
    "EtherExt": [-0.28, -0.16, -0.56, 1, 0.59, 0.05, -0.31, 0.4, 0.08, 0.44, 0.42, -0.42, -0.11, -0.09, -0.13, -0.004, -0.01, -0.09],
    "Hydroxy": [-0.33, -0.08, -0.55, 0.59, 1, 0.16, -0.48, 0.62, -0.12, 0.66, 0.72, -0.33, -0.26, -0.22, -0.24, -0.05, -0.02, -0.22],
    "CollaSol": [-0.08, -0.01, -0.03, 0.05, 0.16, 1, -0.02, -0.03, -0.1, -0.01, -0.03, -0.19, 0.01, 0.06, -0.02, 0.05, 0.03, 0.07],
    "Lightn": [-0.02, 0.03, 0.34, -0.31, -0.48, -0.02, 1, -0.21, 0.25, -0.45, -0.55, 0.35, 0.19, 0.19, 0.2, -0.06, -0.02, 0.22],
    "Hue": [-0.33, -0.23, -0.47, 0.4, 0.62, -0.03, -0.21, 1, -0.13, 0.65, 0.67, 0.07, -0.19, -0.1, -0.1, -0.03, 0.08, -0.14],
    "DripLoss": [0.01, 0.18, -0.07, 0.08, -0.12, -0.1, 0.25, -0.13, 1, 0.03, -0.11, 0.02, -0.36, -0.31, -0.32, -0.13, -0.07, -0.01],
    "CookLoss": [-0.38, 0.15, -0.64, 0.44, 0.66, -0.01, -0.45, 0.65, 0.03, 1, 0.73, -0.18, -0.38, -0.31, -0.32, -0.12, -0.07, -0.34],
    "WBshear": [-0.26, -0.01, -0.63, 0.42, 0.72, -0.03, -0.55, 0.67, -0.11, 0.73, 1, -0.28, -0.32, 0.27, 0.33, -0.1, -0.03, -0.37],
    "Appear": [0.1, -0.003, 0.25, -0.42, -0.33, -0.19, 0.35, 0.07, 0.02, -0.18, -0.28, 1, 0.24, 0.27, 0.33, 0.16, 0.24, 0.31],
    "EaseSink": [0.17, -0.16, 0.27, -0.11, -0.26, 0.01, 0.19, -0.19, -0.02, -0.36, -0.38, 0.24, 1, 0.93, 0.91, 0.69, 0.66, 0.92],
    "Friabil": [0.1, -0.17, 0.2, -0.09, -0.22, 0.06, 0.19, -0.1, -0.03, -0.31, -0.32, 0.27, 0.93, 1, 0.94, 0.72, 0.7, 0.92],
    "Residue": [0.08, -0.19, 0.23, -0.13, -0.24, -0.02, 0.2, -0.1, -0.02, -0.32, -0.33, 0.33, 0.91, 0.94, 1, 0.72, 0.7, 0.91],
    "InJuice": [0.08, -0.08, 0.03, -0.004, -0.05, 0.05, -0.06, -0.03, -0.13, -0.12, -0.1, 0.16, 0.69, 0.72, 0.72, 1, 0.93, 0.8],
    "SusJuice": [0.01, -0.09, -0.004, -0.01, -0.02, 0.03, -0.02, 0.08, -0.15, -0.07, -0.03, 0.24, 0.66, 0.7, 0.7, 0.93, 1, 0.79],
    "OvAcc": [0.13, -0.13, 0.21, -0.09, -0.22, 0.07, 0.22, -0.14, -0.01, -0.34, -0.37, 0.31, 0.92, 0.92, 0.91, 0.8, 0.79, 1]
}

beef = pd.DataFrame(data)

# Compute the canonical correlation
X = beef.iloc[:, 2:9]  # Predictor variables (pH, Water, Protein, EtherExt, etc.)
Y = beef.iloc[:, 9:]  # Response variables (Appear, EaseSink, Friabil, etc.)

X = StandardScaler().fit_transform(X)
Y = StandardScaler().fit_transform(Y)

# Perform Canonical Correlation Analysis (CCA)
cca = CCA(n_components=7)
cca.fit(X, Y)

# Canonical Variables (V1, V2 for X and W1, W2 for Y)
X_c, Y_c = cca.transform(X, Y)

print(X_c.shape)

# Canonical Correlation Calculation
canonical_corr = [np.corrcoef(X_c[:, i], Y_c[:, i])[0, 1] for i in range(cca.n_components)]
print("Canonical Correlations:", canonical_corr)

(18, 7)
Canonical Correlations: [0.9999986920583978, 0.9777339762664219, 0.9430113060875043, 0.8989961024018518, 0.7475971483545059, 0.6830925007641399, 0.6467176947177311]


In [None]:
def get_canonical_coefficients(beef):

    # Partition the Beef dataset into submatrices
    Exx = beef[:11, :11]  # Top-left
    Eyx = beef[11:, :11]  # Bottom-left
    Exy = beef[:11, 11:]  # Top-right
    Eyy = beef[11:, 11:]  # Bottom-right

    print(Exx.shape, Eyx.shape, Exy.shape, Eyy.shape)

    # Inverse matrices
    invExx = inv(Exx)
    invEyy = inv(Eyy)

    # Canonical correlation
    H = Eyx @ invExx @ Exy
    E = Eyy - Eyx @ invExx @ Exy

    Cancorr_values, Cancorr_vectors = eig(H, Eyy)  # Solve generalized eigenproblem

    # Sorting eigenvalues in descending order
    Cancorr_values = np.sort(np.real(Cancorr_values))[::-1]  # Keep real parts only

    # Variance proportions and cumulative proportions
    varPC = Cancorr_values / Cancorr_values.sum()
    cumu = np.cumsum(varPC)

    invE = inv(E)

    # Eigen decomposition
    values, vectors = eig(invE @ H)

    # Sort eigenvalues in descending order
    sorted_indices = np.argsort(values)[::-1]
    values = values[sorted_indices]

    # Calculate proportions and cumulative proportions
    var = np.real(values)  # Ensure we use the real part
    varPC = var / var.sum()
    cumu = np.cumsum(varPC)

    # Create the results DataFrame
    results = pd.DataFrame({
        "CanCor": np.sqrt(Cancorr_values),
        "Squared CanCor": Cancorr_values,   
        "Eigenvalues": var,
        "Proportion": varPC,
        "Cumulative": cumu
    })
    
    return results


S = beef.drop(columns=["_type_", "_Name_"]).values

get_canonical_coefficients(S)

(11, 11) (7, 11) (11, 7) (7, 7)


Unnamed: 0,CanCor,Squared CanCor,Eigenvalues,Proportion,Cumulative
0,0.758041,0.574626,1.350875,0.529065,0.529065
1,0.666103,0.443694,0.797571,0.312366,0.841431
2,0.424323,0.18005,0.219587,0.086,0.927431
3,0.312331,0.097551,0.108096,0.042335,0.969766
4,0.26444,0.069929,0.075186,0.029447,0.999213
5,0.07774,0.006044,0.001005,0.000394,0.999606
6,0.07774,0.006044,0.001005,0.000394,1.0


: 