In [None]:
import pandas as pd
import numpy as np

from typing import Dict

from scipy.stats import entropy

from sklearn.metrics import mutual_info_score, adjusted_mutual_info_score, normalized_mutual_info_score

In [None]:
def series_entropy(s: pd.Series) -> float:
    """Compute the entropy from samples in a Pandas series"""
    return entropy(s.value_counts(normalize=True))

In [None]:
def igr(df_features: pd.DataFrame, target: pd.Series) -> Dict[Any, float]:
    """
    Calculate the information gain ratio for each feature in a dataframe

    Parameters
    ----------
    df_features : Dataframe
        The features for which the information gain ratio will be calculated
    target : Series
        The targets for which the information gain ratio with each feature will be calculated

    Returns
    -------
    A dictionary of feature names to information gain ratio, for each feature in df_features.
    """
    
    return {
        col: mutual_info_score(df_features[col], target) / series_entropy(df_features[col])
        for col in df_features.columns
    }