In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as spst
import seaborn as sns
from sklearn import datasets
from sklearn.preprocessing import QuantileTransformer, StandardScaler
from sklearn.manifold import TSNE
import kdquantile

np.random.seed(1)

# From https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient


In [None]:
X = np.random.uniform(size=100)
Y = np.log(X/(1-X))
Y = np.sign(Y)*np.abs(Y)**1.4

cp = np.cov(X,Y)
cp = cp[0,1]/np.sqrt(cp[0,0]*cp[1,1])

XR = np.argsort(np.argsort(X))
YR = np.argsort(np.argsort(Y))
cr = np.cov(XR,YR)
cr = cr[0,1]/np.sqrt(cr[0,0]*cr[1,1])

Xkdq = kdquantile.KDQuantileTransformer(alpha=1.).fit_transform(X.reshape(-1, 1))
Ykdq = kdquantile.KDQuantileTransformer(alpha=1.).fit_transform(Y.reshape(-1, 1))
ckdq = np.cov(Xkdq.ravel(),Ykdq.ravel())
ckdq = ckdq[0,1]/np.sqrt(ckdq[0,0]*ckdq[1,1])

plt.clf()
plt.figure(figsize=(2.5,2.5))
plt.axes([0.17,0.12,0.8,0.75])
plt.plot(X, Y, 'o', markersize=4, markeredgecolor='black', markeredgewidth=0.2, color='orange')
#plt.grid(True)
plt.text(0.35, -20,  "Spearman: %.2f\nPearson: %.2f\nKDQ: %.2f" % (cr,cp, ckdq));
plt.xlabel("X", size=11)
plt.ylabel("Y", size=11)
plt.xlim(-0.05,1.05);
plt.savefig("correlation-wikipedia-1.pdf", bbox_inches="tight");

In [None]:
r = 0.4
X = np.random.normal(size=100)
Y = r*X + np.sqrt(1-r**2)*np.random.normal(size=100)

cp = np.cov(X,Y)
cp = cp[0,1]/np.sqrt(cp[0,0]*cp[1,1])

XR = np.argsort(np.argsort(X))
YR = np.argsort(np.argsort(Y))
cr = np.cov(XR,YR)
cr = cr[0,1]/np.sqrt(cr[0,0]*cr[1,1])

Xkdq = kdquantile.KDQuantileTransformer(alpha=1.).fit_transform(X.reshape(-1, 1))
Ykdq = kdquantile.KDQuantileTransformer(alpha=1.).fit_transform(Y.reshape(-1, 1))
ckdq = np.cov(Xkdq.ravel(),Ykdq.ravel())
ckdq = ckdq[0,1]/np.sqrt(ckdq[0,0]*ckdq[1,1])

plt.clf()
plt.figure(figsize=(2.5,2.5))
plt.axes([0.17,0.12,0.8,0.75])
#plt.hold(True)
plt.plot(X, Y, 'o', markersize=4, markeredgecolor='black', markeredgewidth=0.2, color='orange')
#plt.grid(True)
plt.text(-2.5, 1.65, "Spearman: %.2f\nPearson: %.2f\nKDQ: %.2f" % (cr,cp, ckdq))
plt.xlabel("X", size=11)
plt.ylabel("Y", size=11);
plt.savefig("correlation-wikipedia-2.pdf", bbox_inches="tight");

In [None]:
r = 0.8
X = np.random.normal(size=100)
Y = r*X + np.sqrt(1-r**2)*np.random.normal(size=100)

ii = np.argsort(-X)
X[ii[0:5]] *= 3

cp = np.cov(X,Y)
cp = cp[0,1]/np.sqrt(cp[0,0]*cp[1,1])

XR = np.argsort(np.argsort(X))
YR = np.argsort(np.argsort(Y))
cr = np.cov(XR,YR)
cr = cr[0,1]/np.sqrt(cr[0,0]*cr[1,1])

Xkdq = kdquantile.KDQuantileTransformer(alpha=1.).fit_transform(X.reshape(-1, 1))
Ykdq = kdquantile.KDQuantileTransformer(alpha=1.).fit_transform(Y.reshape(-1, 1))
ckdq = np.cov(Xkdq.ravel(),Ykdq.ravel())
ckdq = ckdq[0,1]/np.sqrt(ckdq[0,0]*ckdq[1,1])

plt.clf()
plt.figure(figsize=(2.5,2.4))
plt.axes([0.17,0.12,0.8,0.75])
#plt.hold(True)
plt.plot(X, Y, 'o', markersize=4, markeredgecolor='black', markeredgewidth=0.2, color='orange')
#plt.grid(True)
plt.text(2.3, -1.5, "Spearman: %.2f\nPearson: %.2f\nKDQ: %.2f" % (cr,cp, ckdq))
plt.xlabel("X", size=11)
plt.ylabel("Y", size=11);
plt.savefig("correlation-wikipedia-3.pdf", bbox_inches="tight");