# PSI

In [1]:
import numpy as np
import pandas as pd
from sklearn.utils import resample
from sklearn.preprocessing import KBinsDiscretizer

def psi(expected, actual, bins):
    # Use KBinsDiscretizer to create bins for expected and actual values
    discretizer = KBinsDiscretizer(n_bins=bins, encode='ordinal', strategy='uniform')
    expected_bins = discretizer.fit_transform(expected.reshape(-1, 1))
    actual_bins = discretizer.transform(actual.reshape(-1, 1))
    
    # Calculate the percentage of observations in each bin for expected and actual
    expected_perc = pd.value_counts(expected_bins.flatten(), normalize=True).sort_index().values
    actual_perc = pd.value_counts(actual_bins.flatten(), normalize=True).sort_index().values
    
    # Calculate the PSI value
    psi = sum((actual_perc[i] - expected_perc[i]) * np.log(actual_perc[i] / expected_perc[i]) for i in range(len(expected_perc)))
    
    return psi

