In [None]:
from sklearn.cross_decomposition import PLSRegression
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from scipy.linalg import svd
from numpy.linalg import norm


%matplotlib notebook
sns.set_theme(style="ticks", context="notebook", palette="muted")

In [None]:
data = pd.read_csv("solubility_alc.csv")
data

In [None]:
xvars = ['MolLogP', 'nAtom']
yvars = ['measured log solubility in mols per litre', 'Molecular Weight']

scaler_x, scaler_y = StandardScaler(), StandardScaler()

Y = scaler_y.fit_transform(data[yvars].to_numpy())
X = scaler_x.fit_transform(data[xvars].to_numpy())

In [None]:
fig, axes = plt.subplots(constrained_layout=True, ncols=2, figsize=(8, 4))
axes[0].scatter(X[:, 0], X[:, 1])
axes[0].set(xlabel=xvars[0], ylabel=xvars[1], title='X')

axes[1].scatter(Y[:, 0], Y[:, 1])
axes[1].set(xlabel=yvars[0], ylabel=yvars[1], title='Y')


sns.despine(fig=fig)

In [None]:
# Loadings by hand:
R = np.array([0.0, 1.0])
R = R / norm(R)  # Normalize to unit vector
R = R.reshape(2, -1)  # Make it a column vector
    
Ry = np.array([0.0, 1.0])
Ry = Ry / norm(Ry)
Ry = Ry.reshape(2, -1)  # Make it a column vector

print("R", R, R.shape)
print("Ry", Ry, Ry.shape)

In [None]:
def make_plot(X, Y, R, Ry):
    fig, axes = plt.subplots(constrained_layout=True, ncols=3, figsize=(9, 3))
    axes[0].scatter(X[:, 0], X[:, 1])
    axes[0].set(xlabel=xvars[0], ylabel=xvars[1], title='X')

    axes[1].scatter(Y[:, 0], Y[:, 1])
    axes[1].set(xlabel=yvars[0], ylabel=yvars[1], title='Y')

    axes[0].quiver(0, 0, R[0][0], R[1][0], color='black',
               angles='xy', scale_units='xy', scale=0.25, width=0.015)
    
    axes[1].quiver(0, 0, Ry[0][0], Ry[1][0], color='red',
               angles='xy', scale_units='xy', scale=0.25, width=0.015)
    
    T = X @ (R / norm(R))
    U = Y @ (Ry / norm(Ry))
    
    correlation = T.T @ U

    axes[2].scatter(T[:, 0], U[:, 0])
    axes[2].set(
        xlabel='X-scores (T)',
        ylabel='Y-scores (U)',
        title=f'Correlation: {correlation[0][0]:.2f}1')
    sns.despine(fig=fig)

In [None]:
make_plot(X, Y, R, Ry)

In [None]:
# Loadings by hand:
R = np.array([, ])
R = R / norm(R)  # Normalize to unit vector
R = R.reshape(2, -1)  # Make it a column vector
    
Ry = np.array([, ])
Ry = Ry / norm(Ry)
Ry = Ry.reshape(2, -1)  # Make it a column vector
make_plot(X, Y, R, Ry)

In [None]:
model = PLSRegression(scale=False, n_components=1)
model.fit(X, Y)
make_plot(X, Y, model.x_rotations_, model.y_rotations_)

In [None]:
U, _, Vt = svd(X.T @ Y)
w = U[:, 0].reshape(2, -1)

w = w / norm(w)

q = Vt[0, :].reshape(2, -1)
q = q / norm(q)

In [None]:
make_plot(X, Y, -w, -q)