In [38]:
import mlflow
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
from pgd_optim_pytorch._utils import filter_df
%matplotlib inline

In [39]:
# Set the tracking URI to the directory containing the mlruns folder
mlflow.set_tracking_uri("../../../mlruns")

# Confounded Addition - max gamma

We will check how frequently can pSAGD find the global minimum of the CIB when $\gamma = 1$ and $r_Y = 0.1$.
The chosen hyperparameters were: `lr = 1.0, 10.0`, `temperature = 10.0, 100.0` and `cooling_rate = 0.99`. 

# Load and pre-process the data

In [40]:
# Retrieve all runs from the experiment of interest
experiment = mlflow.get_experiment_by_name("Confounded Addition - max gamma")
experiment_id = experiment.experiment_id
print(experiment_id)

865499003854706284


In [52]:
try:
    runs_df = pd.read_csv('confounded_addition_max_gamma.csv')
except FileNotFoundError:
    # Extract df from mlflow directly
    runs_df = mlflow.search_runs(experiment_ids=experiment_id, max_results=100000)
    # Export the DataFrame to a CSV file
    runs_df.to_csv('confounded_addition_max_gamma.csv', index=False)

print(len(runs_df))
print(runs_df.columns)

500
Index(['run_id', 'experiment_id', 'status', 'artifact_uri', 'start_time',
       'end_time', 'metrics.Expected optimal CIB loss', 'metrics.CIB loss',
       'metrics.HY', 'metrics.diff_loss', 'metrics.converged',
       'metrics.VI of T and T_', 'metrics.HcYdoT',
       'metrics.Expected optimal HcYdoT', 'metrics.HT',
       'metrics.Expected optimal HT', 'metrics.diff_q', 'metrics.penalty',
       'metrics.Expected optimal HTcondX', 'metrics.HTcondX',
       'metrics.Final CIB loss', 'params.max iter', 'params.lr',
       'params.Expected optimal q', 'params.r_y', 'params.end_lr_factor',
       'params.Learned q', 'params.temperature', 'params.eps', 'params.gamma',
       'params.cooling rate', 'params.beta', 'tags.mlflow.source.name',
       'tags.optimizer', 'tags.mlflow.source.type', 'tags.mlflow.runName',
       'tags.mlflow.user', 'tags.mlflow.source.git.commit', 'tags.loss'],
      dtype='object')


In [53]:
# Add duration metric
runs_df['metrics.duration']=(pd.to_datetime(runs_df['end_time'], format='ISO8601') - pd.to_datetime(runs_df['start_time'], format='ISO8601')).dt.total_seconds()

In [54]:
# Only need some columns
runs = runs_df[[
      'metrics.duration',
      'params.r_y',
      'params.gamma',
      'params.beta',
      'params.lr',
      'params.temperature',
      'metrics.VI of T and T_',
      'metrics.Final CIB loss',
      'metrics.HT',
      'metrics.HTcondX',
      'metrics.HY',
      'metrics.HcYdoT',
      'metrics.converged',
      'params.Learned q',
      'tags.optimizer',
      'tags.loss'
]]

In [55]:
# List of columns that should be float
float_columns = ['params.r_y', 'params.gamma', 'params.beta', 'params.lr', 'params.temperature'] 
runs.loc[:, float_columns] = runs[float_columns].apply(pd.to_numeric, errors='raise')

# Remove prefixes
runs.columns = runs.columns.str.split('.').str[1]

# Add MI, Ic columns
runs.loc[:, ['MI_XT']] = runs['HT'] - runs['HTcondX']
runs.loc[:, ['IcYdoT']] = runs['HY'] - runs['HcYdoT']
runs.head()

Unnamed: 0,duration,r_y,gamma,beta,lr,temperature,VI of T and T_,Final CIB loss,HT,HTcondX,HY,HcYdoT,converged,Learned q,optimizer,loss,MI_XT,IcYdoT
0,35.462,0.1,1.0,inf,1.0,10.0,3.935406e-08,-0.018044,1.561278,-0.0,1.36979,1.351747,1.0,"tensor([[[0., 1.],\n [1., 0.]],\n\n ...",SAGD,wCIB+penalty,1.561278,0.018044
1,39.354,0.1,1.0,inf,1.0,10.0,3.935406e-08,-0.018044,1.561278,-0.0,1.36979,1.351747,1.0,"tensor([[[0., 0.],\n [0., 1.]],\n\n ...",SAGD,wCIB+penalty,1.561278,0.018044
2,41.775,0.1,1.0,inf,1.0,10.0,3.935406e-08,-0.018044,1.561278,-0.0,1.36979,1.351747,1.0,"tensor([[[0., 1.],\n [1., 0.]],\n\n ...",SAGD,wCIB+penalty,1.561278,0.018044
3,37.877,0.1,1.0,inf,1.0,10.0,3.935406e-08,-0.018044,1.561278,-0.0,1.36979,1.351747,1.0,"tensor([[[0., 0.],\n [0., 1.]],\n\n ...",SAGD,wCIB+penalty,1.561278,0.018044
4,38.924,0.1,1.0,inf,1.0,10.0,3.935406e-08,-0.018044,1.561278,-0.0,1.36979,1.351747,1.0,"tensor([[[0., 1.],\n [1., 0.]],\n\n ...",SAGD,wCIB+penalty,1.561278,0.018044


## Frequencies for wCIB (no penalty)

In [57]:
df = filter_df(runs, optimizer='SAGD', loss='wCIB')
# For each learning rate and temperature, compute success rate
results = []

In [58]:
for temp in np.sort(df['temperature'].unique()):
    for lr in np.sort(df['lr'].unique()):
        df_fixedlrtemp = filter_df(df, lr=lr, temperature=temp)
        try:
            num_successful_runs = pd.Series.value_counts(df_fixedlrtemp['VI of T and T_'] < 1e-5).loc[True]
        except KeyError: # No successful runs -> no True row.
            num_successful_runs = 0
        total_num_runs = len(df_fixedlrtemp)
        print(f"For lr={lr:.1e} and temperature={temp:.1e}, "
              + f"the ground truth abstraction was found in {num_successful_runs} of the {total_num_runs} runs."
        ) 
        success_rate = num_successful_runs / total_num_runs if total_num_runs > 0 else 0
        average_duration = df_fixedlrtemp['duration'].mean()
        results.append({'temperature': temp, 'lr': lr, 'success_rate': success_rate, 'average_duration': average_duration})
    
# Create a DataFrame from the results
results_df = pd.DataFrame(results)
results_df

For lr=1.0e+00 and temperature=1.0e+01, the ground truth abstraction was found in 68 of the 100 runs.
For lr=1.0e+01 and temperature=1.0e+01, the ground truth abstraction was found in 32 of the 100 runs.
For lr=1.0e+00 and temperature=1.0e+02, the ground truth abstraction was found in 45 of the 100 runs.
For lr=1.0e+01 and temperature=1.0e+02, the ground truth abstraction was found in 22 of the 100 runs.


Unnamed: 0,temperature,lr,success_rate,average_duration
0,10.0,1.0,0.68,29.70671
1,10.0,10.0,0.32,10.0669
2,100.0,1.0,0.45,43.38643
3,100.0,10.0,0.22,18.68822


## Frequencies for wCIB+penalty

Inspecting the incorrect learned encoders from the runs above revealed that many correspond to cases where some values of $T$ were not utilized.
These are not global minima, but the results suggest they are local minima.
We can avoid them explicitly by adding a penalty term (which we refer to as the "non-surjectivity penalty") to the wCIB, which penalizes the learner when it gets close to such local minima of the wCIB.
We will take the best (temperature, lr) pair from the above hyperparameter search and use it to minimize the penalized version of the wCIB

In [59]:
df_pen = filter_df(runs, optimizer='SAGD', loss='wCIB+penalty')
# For each learning rate and temperature, compute success rate
results = []

In [60]:
for temp in np.sort(df_pen['temperature'].unique()):
    for lr in np.sort(df_pen['lr'].unique()):
        df_fixedlrtemp = filter_df(df_pen, lr=lr, temperature=temp)
        try:
            num_successful_runs = pd.Series.value_counts(df_fixedlrtemp['VI of T and T_'] < 1e-5).loc[True]
        except KeyError: # No successful runs -> no True row.
            num_successful_runs = 0
        total_num_runs = len(df_fixedlrtemp)
        print(f"For lr={lr:.1e} and temperature={temp:.1e}, "
              + f"the ground truth abstraction was found in {num_successful_runs} of the {total_num_runs} runs."
        ) 
        success_rate = num_successful_runs / total_num_runs if total_num_runs > 0 else 0
        average_duration = df_fixedlrtemp['duration'].mean()
        results.append({'temperature': temp, 'lr': lr, 'success_rate': success_rate, 'average_duration': average_duration})
    
# Create a DataFrame from the results
results_df = pd.DataFrame(results)
results_df

For lr=1.0e+00 and temperature=1.0e+01, the ground truth abstraction was found in 98 of the 100 runs.


Unnamed: 0,temperature,lr,success_rate,average_duration
0,10.0,1.0,0.98,38.95463


### Conclusion

For $\gamma = 1$, a temperature of 10 and lr of 1.0 results in an estimated success rate of 0.98, as long as one uses a non-surjectivity penalty.