In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd

## Results on `MNISTs`

### Step 1: Prepation - Run experiments on Lisa GPUs before visualizing results

> **Note**: It might take a long time to run this on a CPU machine (~40 mins). Instead, we would recommend
> running it on a Lisa GPU machine using the following instructions. That will run and generate all results
> and then you could run the cell below which will display all the results.

**Instruction to run it on a Lisa GPU**
1. Create a job script as follows and save it as `lisa_mnist.job`:

```sh
#!/bin/bash

#SBATCH --partition=gpu_shared_course
#SBATCH --gres=gpu:1
#SBATCH --job-name=MNIST
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=2
#SBATCH --time=10:00:00
#SBATCH --mem=32000M
#SBATCH --output=slurm_output_%A.out

module purge
module load 2021
module load Anaconda3/2021.05

# Activate your environment
source deactivate
source activate gcn-gpu

# Run your code
## runs all experiments to reproduce Table 2 in the paper
python mnist_pipeline.py
## runs all additional analyses around explainability
python mnist_analysis.py
```

2. Run the job script using 

```sh
cd /path/to/repo/experiments/
sbatch lisa_mnist.job
```

3. After these steps are done, you can run the following cells that will display the result.

### Replication of Table 2

In [None]:
from mnist_pipeline import run_experiments

In [None]:
df = run_experiments(seed=0, show=False)

In [None]:
df.index = ["Original", "GAN", "CGN", "Original + GAN", "Original + CGN"]

In [None]:
# show the results    
df.astype(float).round(1)

### Additional experiments: Explainability analyses

In [None]:
from mnist_analysis import run_analyses

In [None]:
run_analyses(
    datasets=["colored_MNIST", "double_colored_MNIST", "wildlife_MNIST"],
    debug=False,
    show=True,
    ignore_cache=False,
)

## Results on `ImageNet-Mini`

> Note: This section involves generating counterfactual samples and training classifiers on IN-mini.
> Generating CF samples can take about 3.5 hours and training the classifier about 2 hours on Lisa GPU.

**Instruction to run it on a Lisa GPU**
1. Create a job script as follows and save it as `lisa_imagenet.job`:

```sh
#!/bin/bash

#SBATCH --partition=gpu_shared_course
#SBATCH --gres=gpu:1
#SBATCH --job-name=IN-mini
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=2
#SBATCH --time=10:00:00
#SBATCH --mem=32000M
#SBATCH --output=slurm_output_%A.out

module purge
module load 2021
module load Anaconda3/2021.05

# Activate your environment
source deactivate
source activate gcn-gpu

# Run your code
## runs all experiments to reproduce Table 3 and 4 in the paper
python imagenet_pipeline.py
## runs all additional analyses around explainability
# python mnist_analysis.py
```

2. Run the job script using 

```sh
cd /path/to/repo/experiments/
sbatch lisa_imagenet.job
```

3. After these steps are done, you can run the following cells that will display the result.

### Results for Table 3 and 4 from the paper

In [3]:
from imagenet_pipeline import run_experiments

In [17]:
# temporarily showing results for 0th epoch
metrics_clf, df_ood = run_experiments(seed=0, disp_epoch=6)


::::: Generating CF dataset :::::


Train dataset exists with 34745 images, skipping...
Path to dataset: /home/lcur0478/piyush/projects/fact-team3/cgn_framework/imagenet/data/in-mini_train_trunc_0.5


Val dataset exists with 3923 images, skipping...
Path to dataset: /home/lcur0478/piyush/projects/fact-team3/cgn_framework/imagenet/data/in-mini_val_trunc_0.5


::::: Training classifier :::::

::::: Classifier already trained, skipping :::::
Loading results for epoch 6 from /home/lcur0478/piyush/projects/fact-team3/cgn_framework/imagenet/experiments/classifier__in-mini-classifier/epochwise_metrics/epoch_6.pt

::::: Evaluating OOD :::::

::::: Running cgn-ensemble on in-mini...
::: Result file /home/lcur0478/piyush/projects/fact-team3/cgn_framework/imagenet/experiments/ood_eval/cgn-ensemble_in-mini/results_seed_0.json already exists & --ignore_cache=False
::::: Running cgn-ensemble on in-a...
::: Result file /home/lcur0478/piyush/projects/fact-team3/cgn_framework/imagenet/experiments/ood_

In [18]:
# construct Table 3 of the paper

heads = ["shape", "texture", "bg"]
table_3 = pd.DataFrame(
    None,
    columns=["Shape bias", "Top 1", "Top 5"],
    index=[f"IN-mini + CGN/{h}" for h in heads],
)
for i, h in enumerate(heads):
    table_3.at[f"IN-mini + CGN/{h}", "Shape bias"] = metrics_clf[f"shape_biases/{i}_m_{h}_bias"]
    table_3.at[f"IN-mini + CGN/{h}", "Top 1"] = metrics_clf[f"acc1/1_real"]
    table_3.at[f"IN-mini + CGN/{h}", "Top 5"] = metrics_clf[f"acc5/1_real"]

table_3["Shape bias"] *= 100.0
table_3 = table_3.astype(float).round(1)

In [19]:
table_3

Unnamed: 0,Shape bias,Top 1,Top 5
IN-mini + CGN/shape,45.7,52.2,76.8
IN-mini + CGN/texture,19.2,52.2,76.8
IN-mini + CGN/bg,29.6,52.2,76.8


In [20]:
# construct Table 4 of the paper
table_4 = pd.DataFrame(
    None,
    columns=["IN-9", "Mixed-same", "Mixed-rand", "BG-gap"],
    index=["IN-mini + CGN"],
)

col_to_key = {
    "IN-9": "in_9_acc1_original/avg",
    "Mixed-same": "in_9_acc1_mixed_same/avg",
    "Mixed-rand": "in_9_acc1_mixed_rand/avg",
    "BG-gap": "in_9_gaps/bg_gap",
}

for c in table_4.columns:
    assert col_to_key[c] in metrics_clf
    key = col_to_key[c]
    table_4.at["IN-mini + CGN", c] = metrics_clf[key]

table_4 = table_4.astype(float).round(1)

In [21]:
table_4

Unnamed: 0,IN-9,Mixed-same,Mixed-rand,BG-gap
IN-mini + CGN,84.2,67.5,57.4,4.6


### Interpretability Analysis 

In [22]:
# add a link to the Colab NB here

### Robustness to out-of-distribution generalization

In [23]:
df_ood

Unnamed: 0,in-mini,in-a,in-stylized,in-sketch
cgn-ensemble,56.793,1.386667,17.1875,11.774647
resnet50,75.58,3.4,19.21875,24.092
