In [1]:
import random
import numpy as np
import matplotlib.pyplot as plt
from numpy import exp
from scipy.stats import norm
from sklearn.preprocessing import QuantileTransformer

# **GENERATE SOME DATA:**

# Set a seed for reproducibility
np.random.seed(10)

n_samples=1000
# Generate Gaussian data sample
data = np.random.randn(n_samples)
# Add a skew to the data distribution
data = exp(data)
# Reshape data to have rows and columns
data = data.reshape(-1,1)

#-------------------------------------------------------------------------------

# **CALCULATING THE QUANTILES AND PERCENTILES:**

# Setting the transformer
n_quantiles=6
qt = QuantileTransformer(
    output_distribution='normal',subsample=None,n_quantiles=n_quantiles)

# Finding the quantiles
qt.fit(X=data.reshape(-1,1))

quantiles=qt.quantiles_.flatten()
percentiles=np.zeros([n_quantiles])

idx=[((n_samples-1)/(n_quantiles-1))*k for k in range(n_quantiles)]
quantiles_check=np.zeros([n_quantiles])
sorted_data=np.sort(data,axis=0).flatten()

for k in range(n_quantiles):
    
    percentiles[k]=k/(n_quantiles-1)
    
    print(f"{k}. quantile: {quantiles[k]} -> percentile: {percentiles[k]*100}%")
 
    if idx[k]!=int(idx[k]):

        # Manual check:
        percentage=(idx[k]-int(idx[k]))
        step=sorted_data[int(idx[k])+1]-sorted_data[int(idx[k])]
        addition=percentage*step
        quantiles_check[k]=sorted_data[int(idx[k])]+addition
        
    else:
        
        # Manual check:
        quantiles_check[k]=sorted_data[int(idx[k])]
    
    print(f"quantile manual check: {quantiles_check[k]}")

print(f"\n")

#-------------------------------------------------------------------------------

# **TRANSFORMING THE QUANTILES:**

for k in range(n_quantiles):
    
    print(f"Percentile point function check for {k}. quantile:")
    print("qt.transform(quantile):") 
    print(qt.transform(np.array([quantiles[k]]).reshape(-1,1)))
    
    if k==0:
        print(f"norm.ppf(percentile):")
        print(norm.ppf(percentiles[k]+1e-7,loc=0,scale=1.0))
    elif k==n_quantiles-1:
        print(f"norm.ppf(percentile):")
        print(norm.ppf(percentiles[k]-1e-7,loc=0,scale=1.0))
    else:
        print(f"norm.ppf(percentile):")
        print(norm.ppf(percentiles[k],loc=0,scale=1.0))
        
    print(f"\n")
    
#-------------------------------------------------------------------------------

# **PLAYING WITH A POINT THAT IS POSITIONED BETWEEN TWO QUANTILES:**
    
random.seed(10)

# Point that needs to be transformed
# is somewhere between quantiles[k] and quantiles[k+1]:
k=random.randint(0,n_quantiles-2)
num=random.random()
x=quantiles[k]+num*(quantiles[k+1]-quantiles[k])

# Percentile is calculated with linear interpolation
a=percentiles[k]
b=((percentiles[k+1]-percentiles[k])/(quantiles[k+1]-quantiles[k]))
x0=quantiles[k]
percentile=a+b*(x-x0)
x_transformed=norm.ppf(percentile)

print(f"x={x} -> percentile(x)={percentile}")
print(f"\n")
print(f"qt.transform(x):")
print(qt.transform(np.array(x).reshape(-1,1)))
print("x_transformed:")
print(x_transformed)
print(f"\n")

# Transform the point back to the original space
percentile=norm.cdf(x_transformed)
x_back=(percentile-a)/b+x0

print("qt.inverse_transform(qt.transform(x)):") 
print(qt.inverse_transform(qt.transform(np.array(x).reshape(-1,1))))
print("x_back:")
print(x_back)

0. quantile: 0.04058318972369623 -> percentile: 0.0%
quantile manual check: 0.04058318972369623
1. quantile: 0.45261614214031254 -> percentile: 20.0%
quantile manual check: 0.45261614214031254
2. quantile: 0.7880576872337507 -> percentile: 40.0%
quantile manual check: 0.7880576872337507
3. quantile: 1.2357563673533 -> percentile: 60.0%
quantile manual check: 1.2357563673533
4. quantile: 2.1275282556894264 -> percentile: 80.0%
quantile manual check: 2.1275282556894264
5. quantile: 14.583785188055503 -> percentile: 100.0%
quantile manual check: 14.583785188055503


Percentile point function check for 0. quantile:
qt.transform(quantile):
[[-5.19933758]]
norm.ppf(percentile):
-5.1993375821928165


Percentile point function check for 1. quantile:
qt.transform(quantile):
[[-0.84162123]]
norm.ppf(percentile):
-0.8416212335729142


Percentile point function check for 2. quantile:
qt.transform(quantile):
[[-0.2533471]]
norm.ppf(percentile):
-0.2533471031357997


Percentile point function check 