# Programmatic Entropy and Information Calculations -- Demo Notebook

## 0. Setup

### 0.1. Imports

In [1]:
## Set the PYTHON package path
import os, sys
package_root = os.path.abspath('../src/')
if package_root not in sys.path:
    sys.path.insert(0, package_root)

#package_root

In [2]:
## Import our data creation / prep routines
from nam_entropy.make_data import make_samples_dataframe_from_distributions
from nam_entropy.data_prep import data_df_to_pytorch_data_tensors_and_labels

## Import soft entropy calculation routines
from nam_entropy.h import *

## Import visualization routines
from nam_entropy.bin_distribution_plots import plot_tensor_bars, get_label_colors, plot_2d_scatter_with_bins, plot_1d_scatter_labeled

## Import utility functions
from nam_entropy.utils import set_all_random_seeds

In [3]:
# Import SciPy distributions
from scipy import stats

## Import the Pytorch distributions
import torch

### 0.2. Fix Randomness

In [4]:
RANDOM_SEED = 42
set_all_random_seeds(RANDOM_SEED)

NameError: name 'set_all_random_seeds' is not defined

## 1. Create Sample Data

In [None]:
## Define some example SciPy distributions
dist1 = stats.norm(loc=0, scale=1)  # Standard normal
dist2 = stats.norm(loc=3, scale=1.5)  # Different normal
dist3 = stats.uniform(loc=-2, scale=4)  # Uniform distribution


In [None]:
## Get some associated sample data from these distributions
n_samples_list = [10, 20, 30]
distribution_list = [dist1, dist2, dist3]
label_list = ['A - Normal(0, 1)', 'B - Normal(3, 1.5)', 'C - Uniform on [-2, 2]']
colors, colors_dict = get_label_colors(label_list, colormap='hsv')

randomized_data_df = \
    make_samples_dataframe_from_distributions(n_samples_list, distribution_list, label_list, 
                                              label_columns_name = 'label', 
                                              data_component_name_list=None, 
                                              randomize_samples = True)

In [None]:
#randomized_data_df.shape

In [None]:
#randomized_data_df

## 2. Pytorch Preparation of Sample Data

In [None]:
## Convert the data to PyTorch format
index_tensor, data_tensor, label_list, label_list_row_index_lookup_dict = \
    data_df_to_pytorch_data_tensors_and_labels(randomized_data_df)

In [None]:
#index_tensor.shape

In [None]:
#index_tensor

In [None]:
#data_tensor.shape

In [None]:
#data_tensor

In [None]:
#label_list

In [None]:
#label_list_row_index_lookup_dict

## 3. Create the associated Soft-binned Probability Distributions

### 3.1. Create the soft-binned probability distributions for each point

In [None]:
## Setup the binning parameters for euclidean space distributions
n_bins = 10
n_heads = 1 
bin_type = "uniform"
dist_fn = "euclidean"
smoothing_fn = "softmax"
smoothing_temp = 1.0

## Perform the soft-binning
tmp_scores, tmp_bins = \
    soft_bin(all_representations = data_tensor, \
             n_bins = n_bins, \
             n_heads = n_heads, \
             bin_type = bin_type, \
             dist_fn = dist_fn, \
             smoothing_fn = smoothing_fn, 
             smoothing_temp = smoothing_temp)

## Show the shape of the final probability distribution tensor -- should be [N, n_heads, n_bins]
tmp_scores.shape                                                                  

In [None]:
#tmp_scores.shape

In [None]:
#data_tensor[0]

In [None]:
## Get the data tensor with no extra n_heads variable
tmp_scores__no_heads = tmp_scores.squeeze(1)
tmp_scores__no_heads.shape

In [None]:
tmp_bins

### 3.2. Create the probability distribution for the total population (all labels together)

In [None]:
## Compute the sum of all soft-binned probability distibutions
prob_dist_sum_tensor = tmp_scores__no_heads.sum(0)

In [None]:
prob_dist_sum_tensor.shape

In [None]:
## Compute the total population probability vector
prob_dist_for_total_population_tensor = prob_dist_sum_tensor / tmp_scores__no_heads.shape[0]

In [None]:
## Check that this is a probability vector
prob_dist_for_total_population_tensor.sum()

In [None]:
## Check its shape
prob_dist_for_total_population_tensor.shape

In [None]:
## Check its values
prob_dist_for_total_population_tensor

### 3.3. Create the probability distributions for each population label

In [None]:
## Prepare to compute the index sum
num_samples = index_tensor.shape[0]  ## also data_tensor.shape[0]
n_bins = tmp_scores.shape[-1]  ## prob_dist_num_of_points
num_labels = len(label_list)

## Get the data tensor with no extra n_heads variable
tmp_scores__no_heads = tmp_scores.squeeze(1)

## Compute the sum of the soft-binned probability distributions for each label
label_prob_dist_sum_tensor = torch.zeros(num_labels, n_bins, dtype = tmp_scores__no_heads.dtype)
label_prob_dist_sum_tensor = label_prob_dist_sum_tensor.index_add(dim=0, source=tmp_scores__no_heads, index=index_tensor)


label_prob_dist_sum_tensor.shape


In [None]:
#label_prob_dist_sum_tensor

In [None]:
## Check the sum over the columns agree with the number of samples per label -- yes! =)
#label_prob_dist_sum_tensor.sum(1)

In [None]:
## Determine the label counts (i.e. the number of samples for each label)
label_counts_tensor = torch.bincount(index_tensor)

## Divide by the label counts to get the probability distributions of each label as a row
label_prob_dist_avg_tensor = label_prob_dist_sum_tensor / label_counts_tensor.unsqueeze(1)

## Define the probability distributions for each label
prob_dist_by_label_tensor = label_prob_dist_avg_tensor

In [None]:
## Check that the average again is a probability distribution
#prob_dist_by_label_tensor.sum(1)

### 3.4. Determine the probability distribution of labels -- for later use in conditional entropy calculation

In [None]:
## Compute the probabilities of the labels
distribution_of_labels = (index_tensor.bincount() / index_tensor.shape[0]).to(torch.float64)

distribution_of_labels.shape

In [None]:
#distribution_of_labels

### 3.5. Visualize the probability distributions

### 3.5.1. Check the probability tensors we're given

In [None]:
#prob_dist_for_total_population_tensor

In [None]:
#prob_dist_by_label_tensor

### 3.5.2. Visualize the data distrubution and bins

In [None]:
fig, ax = plot_1d_scatter_labeled(
    data_tensor=data_tensor,
    index_tensor=index_tensor,
    label_list=label_list,
    bins=tmp_bins.squeeze(1,2),
    title='Scatter Plot for Labeled Data and Number-labeled Soft-binning points',
    xlabel='Data values',
    ylabel='Population Labels',
    label_colors=colors,
    bin_line_color='purple',
    figsize=(12, 3),
)

### 3.5.3. Define the barplot routine to visualize the probability tensor


In [None]:
## Make the display tensor and labels
display_label_list = label_list + ['Total population']
display_tensor = torch.cat([prob_dist_by_label_tensor, prob_dist_for_total_population_tensor.unsqueeze(0)], dim=0)
display_colors = colors + [(0.2, 0.2, 0.2, 0.2)]  # Dark Grey for total

## Plot the soft-binned probability distributions for each of the labels and also for the total population
plot_tensor_bars(display_tensor, title='Soft-binned Probability Distributions by label', 
                 separate_plots=False, labels=display_label_list, xlabel="Bin Number", start_index=1, 
                 row_colors=display_colors)

## 4. Compute Entropy for each population label

In [None]:
## Compute the entropy for the total population
total_population_entropy = entropy(prob_dist_for_total_population_tensor)
total_population_entropy

In [None]:
## Compute the entropy for each label
entropy_by_label_tensor = entropy(prob_dist_by_label_tensor)
entropy_by_label_tensor

In [None]:
## Store the entropies in a dictionary for easy reference
entropy_dict = {
    'total_population': total_population_entropy.item()
}
for i, label in enumerate(label_list):
    entropy_dict[label] = entropy_by_label_tensor[i].item()

## Display the dictionary
entropy_dict

## 5. Compute the Conditional Entropy -- for the sample population $Z$ given its label $L$
**Note:** 
We compute the conditional entropies $H(Z|L)$,
where $Z$ is the total population and $L$ is the categorical label variable. 

We use the formula

$H(Z|L) = \sum_{l \in L} p(l) H(Z|L=l)$

where 

$H(Z|L=l) = -\sum_{z \in Z} p(z|l) \log p(z|l)$

Here we define
- $Z$ := Population data distribution (continuous vector-valued RV)
- $L$ := Sub-population label (categorical RV)

[https://en.wikipedia.org/wiki/Conditional_entropy](https://en.wikipedia.org/wiki/Conditional_entropy)

In [None]:
## Let's compute the conditional entropies for the population given the categorical label
conditional_entropy_of_population_given_the_label = torch.dot(distribution_of_labels, entropy_by_label_tensor).item()
conditional_entropy_of_population_given_the_label

## 6. Compute Mutual Information between the total population RV and the label RV
By definition, the __mutual information__ ${\displaystyle \operatorname {I} (Z, L)}$ of the two random variables $Z$ and $L$ is given by

${\displaystyle \operatorname {I} (Z, L) := \mathrm {H} (Z) - \mathrm {H} (Z|L)\,}$

and we define 
- $Z$ := Population data distribution (continuous vector-valued RV)
- $L$ := Sub-population label (categorical RV)

In [None]:
## Check: This is non-negative -- as it's supposed to be!
mutual_information = entropy_dict['total_population'] - conditional_entropy_of_population_given_the_label
mutual_information

### Entropy Summmary:

In [None]:
entropy_dict['total_population']

In [None]:
conditional_entropy_of_population_given_the_label

In [None]:
mutual_information