<a href="https://colab.research.google.com/github/chrisporras/xgdiss/blob/main/ChrisPorras_GradCAM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Final Project 2023
## GradCAM plug n play

Machine Learning for Biomedical Data Science

Team Xtreme Gradient Dissenters

Members: Audrey Lee, Christian Porras, Joy Jiang

April 24, 2023

Using explainable AI framework `OmniXAI`

https://github.com/salesforce/OmniXAI

## Install packages

In [1]:
!pip install -q omnixai # explainable ai framework
!pip install -q dash # web app, visualization
!pip install -q jupyter-dash # dash + jupyter notebooks/colab
!pip install -q dash_bootstrap_components # dash utils

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m534.9/534.9 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m758.0/758.0 kB[0m [31m24.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m572.4/572.4 kB[0m [31m43.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.8/30.8 MB[0m [31m40.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# NEED TO RESTART RUNTIME AFTER INSTALLING PACKAGES
import omnixai # WILL THROW ERROR IF RUNTIME NOT RESTARTED
# USE THIS TO TEST PACKAGE INSTALL

In [3]:
# for saving output htmls & accessing model .pt
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Load Data
Run all cells to download data from kaggle and unzip into working directory.

In [4]:
!git clone https://github.com/chrisporras/xgdiss.git

Cloning into 'xgdiss'...
remote: Enumerating objects: 58, done.[K
remote: Counting objects: 100% (58/58), done.[K
remote: Compressing objects: 100% (56/56), done.[K
remote: Total 58 (delta 20), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (58/58), 3.69 MiB | 7.76 MiB/s, done.


In [5]:
# install Kaggle public api
! pip install -q kaggle
# Choose the kaggle.json file that you downloaded
! mkdir ~/.kaggle
! cp ./xgdiss/kaggle.json ~/.kaggle/
# Make directory named kaggle and copy kaggle.json file there.
!chmod 600 ~/.kaggle/kaggle.json
#Change the permissions of the file.
! kaggle datasets list

ref                                                            title                                                size  lastUpdated          downloadCount  voteCount  usabilityRating  
-------------------------------------------------------------  --------------------------------------------------  -----  -------------------  -------------  ---------  ---------------  
salvatorerastelli/spotify-and-youtube                          Spotify and Youtube                                   9MB  2023-03-20 15:43:25          10089        353  1.0              
arnabchaki/data-science-salaries-2023                          Data Science Salaries 2023  💸                        25KB  2023-04-13 09:55:16           3853         89  1.0              
erdemtaha/cancer-data                                          Cancer Data                                          49KB  2023-03-22 07:57:00           4856        102  1.0              
evangower/premier-league-2022-2023                             Pr

In [None]:
!kaggle datasets download -d nickuzmenkov/strip-ai-256x256-png-tiles

Downloading strip-ai-256x256-png-tiles.zip to /content
 33% 685M/2.03G [00:33<00:58, 25.2MB/s]

In [None]:
!unzip strip-ai-256x256-png-tiles.zip

## GradCAM

### Explainer pipeline functions

In [None]:
def _plotly_figure(self, index, class_names=None, **kwargs):
    import plotly.express as px
    values = self.results["values"][index]
    labels = self.results["labels"]
    if labels is None:
        fnames, scores = ["Predicted value"], [values]
    else:
        fnames, scores = labels[index], values
        fnames = [class_names[f] for f in fnames] \
            if class_names is not None else [str(f) for f in fnames]
    fig = px.bar(
        y=fnames[::-1],
        x=scores[::-1],
        orientation="h",
        labels={"x": "Predicted value",
                "y": "Label" if labels is not None else "Target"},
        title="",
        color_discrete_map={True: "#008B8B", False: "#DC143C"},
    )
    return fig

In [None]:
def plot_gradcam(model, test, idx, outpath):
  import json
  import torch
  from torchvision import models, transforms
  from PIL import Image as PilImage
  from omnixai.preprocessing.image import Resize
  from omnixai.data.image import Image
  from omnixai.explainers.vision import VisionExplainer
  # from omnixai.visualization.dashboard import Dashboard
  # In this example, we consider an image classification task. We recommend using `Image`
  # to represent a batch of images. `Image` can be constructed from a numpy array or a Pillow
  # image. The following code loads a test image and resizes it to (256, 256).
  # img = Resize((256, 256)).transform(Image(PilImage.open('data/images/dog_cat.png').convert('RGB')))
  img = Image(PilImage.open(test['file_path'][idx]).convert('RGB'))
  # For visulization, the class names corresponding to the labels are also loaded.
  # with open('data/images/imagenet_class_index.json', 'r') as read_file:
  #     class_idx = json.load(read_file)
  #     idx2label = [class_idx[str(k)][1] for k in range(len(class_idx))]
  device = "cuda" if torch.cuda.is_available() else "cpu"
  # The preprocessing function takes an `Image` instance as its input and outputs the
  # processed features that the ML model consumes. In this example, the `Image` object is
  # first converted into a torch tensor via the defined transform and sent to particular
  # device.
  train_ds_mean = torch.tensor([0.9113, 0.8299, 0.8212])
  train_ds_std = torch.tensor([0.1397, 0.2390, 0.3153])
  transform = transforms.Compose([
        transforms.ToTensor(),
        # transforms.Resize((224,224)),
        transforms.Normalize(train_ds_mean, train_ds_std)
  ])
  # preprocess = lambda ims: torch.stack([transform(im.to_pil()) for im in ims])

  # transform = transforms.Compose([
  #     transforms.Resize(256),
  #     transforms.CenterCrop(224),
  #     transforms.ToTensor(),
  #     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
  # ])
  preprocess = lambda ims: torch.stack([transform(im.to_pil()) for im in ims]).to(device)
  # A ResNet model to explain
  # model = models.resnet50(pretrained=True).to(device)
  # The postprocessing function is a simple softmax function transforming the output logits
  # into class probabilities.
  postprocess = lambda logits: torch.nn.functional.softmax(logits, dim=1)
  # We now create a `VisionExplainer`, e.g., the selected explainers include Grad-CAM, LIME,
  # integreated-gradient. `params` in `VisionExplainer` allows us to set parameters for each
  # explainer applied here. For example, the target_layer for Grad-CAM#0 is set to the last
  # layer of `model.layer4`.
  explainer = VisionExplainer(
      explainers=["lime", "ig", "gradcam#0", "gradcam#1", "gradcam#2", "gradcam#3"],
      mode="classification",
      model=model,
      preprocess=preprocess,
      postprocess=postprocess,
      params={
          "gradcam#0": {"target_layer": model.layer4[-1]},
          "gradcam#1": {"target_layer": model.layer4[-2]},
          "gradcam#2": {"target_layer": model.layer4[-1]},
          "gradcam#3": {"target_layer": model.layer4[-2]},
      }
  )
  # Generate explanations given the test instances. The label to explain for the first two
  # Grad-CAM explainers is "bull_mastiff" (the top label) while the label for the second
  # Grad-CAM explainers is "tiger_cat" (label = 281).
  # compare gradcam for opposite class
  local_explanations = explainer.explain(
      img,
      params={
          "gradcam#2": {"y": [1-test['label_num'][idx]]},
          "gradcam#3": {"y": [1-test['label_num'][idx]]},
      }
  )
  # get predictions
  predictions = local_explanations['predict'].get_explanations()
  # write combined html
  with open(outpath, 'w') as f:
    for k in local_explanations.keys():
      fig = local_explanations[k]._plotly_figure(index=0, class_names='CL')
      f.write(fig.to_html(full_html=True, include_plotlyjs='cdn'))
  return predictions

### Run gradcam
Output:
1. Write .html with plotly visualizations to google drive
2. Save predictions and class probabilities in csv

In [None]:
import pandas as pd
import torch
import numpy as np
lab = ['CE', 'LAA']
device = "cuda" if torch.cuda.is_available() else "cpu"
if device=='cpu':
  model = torch.load('/content/drive/MyDrive/XGD_explainable/230421_resnet18_mayo-clinic-tiled-2gb.pt',
                   map_location=torch.device('cpu'))
else:
  model = torch.load('/content/drive/MyDrive/XGD_explainable/230421_resnet18_mayo-clinic-tiled-2gb.pt')
test = pd.read_csv('/content/xgdiss/test.csv')
test['prediction'] = np.zeros(test.shape[0]) 
test['prob_CE'] = np.zeros(test.shape[0]) 
test['prob_LAA'] = np.zeros(test.shape[0]) 
test = test.iloc[:,1:] # drop unnamed: 0 col
### index of test ###
# idx = 0
# label = test['label'][idx]
# outpath = f'/content/drive/MyDrive/XGD_explainable/html/{label}/plotly-test-idx_{idx}-{label}.html'

In [None]:
## LOOP FOR ALL TEST SET ###
## Save 
num_imgs = 10
# num_imgs = test.shape[0] # ALL TEST IMAGES
n = 50 # every 50 images, save test
for idx in range(num_imgs):
  print(f'Working on {idx}')
  label = test['label'][idx]
  outpath = f'/content/drive/MyDrive/XGD_explainable/html/{label}/plotly-test-idx_{idx}-{label}.html'
  predictions = plot_gradcam(model, test, idx, outpath)
  prob = predictions['values'][0]
  prob_CE = prob[0]
  prob_LAA = prob[1]
  pred = lab[np.argmax(prob)]
  test.loc[idx,'prediction'] = pred
  test.loc[idx,'prob_CE'] = prob_CE
  test.loc[idx,'prob_LAA'] = prob_LAA
  if idx % 50 == 0:
    test.to_csv(f'/content/drive/MyDrive/XGD_explainable/html/test-{idx}.csv',
            index=False)