In [3]:
import numpy as np
import pandas as pd

# Example dataset: each row is a bin for "AgeGroup"
data = {
    'AgeGroup':   ['<20', '20-24', '25-29', '30-39', '40+'],
    'GoodCount':  [90, 120, 160, 300, 280],
    'BadCount':   [10,  30,  40,   25,   5]
}
df_bins = pd.DataFrame(data)

# Compute overall totals
total_good = df_bins['GoodCount'].sum()  # e.g., 950
total_bad  = df_bins['BadCount'].sum()   # e.g., 110

# Calculate GoodRate and BadRate
df_bins['GoodRate'] = df_bins['GoodCount'] / total_good
df_bins['BadRate']  = df_bins['BadCount']  / total_bad

# Calculate WoE for each bin (with a small offset to avoid division by zero)
df_bins['WoE'] = np.log((df_bins['BadRate'] + 1e-9) / (df_bins['GoodRate'] + 1e-9))

# Calculate IV per bin and sum up
df_bins['IV_bin'] = (df_bins['BadRate'] - df_bins['GoodRate']) * df_bins['WoE']
IV_age = df_bins['IV_bin'].sum()

print(df_bins[['AgeGroup','GoodCount','BadCount','WoE','IV_bin']])
print(f"Information Value (Age) = {IV_age:.4f}")


  AgeGroup  GoodCount  BadCount       WoE    IV_bin
0      <20         90        10 -0.041243  0.000158
1    20-24        120        30  0.769687  0.112691
2    25-29        160        40  0.769687  0.150255
3    30-39        300        25 -0.328925  0.029115
4      40+        280         5 -1.869370  0.466001
Information Value (Age) = 0.7582
