# End-to-end Solar Out-of-Distribution (SOoD) pipeline

This notebook presents an end-to-end training and prediction pipeline for Anomaly Detection with a context-encoding variational autoencoder.

The model is based on the following paper: Zimmerer, David, et al. "Context-encoding variational autoencoder for unsupervised anomaly detection." arXiv preprint arXiv:1812.05941 (2018).

## Download Code & Data


In [7]:
!pip install wandb -qqq
import wandb

In [8]:
# Log in to your W&B account
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mmariusgiger[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

## Data Acquisition

Make sure to run the setup and install `sdo-cli` first (`make setup` and `make install`).

In [3]:
# the dataset is available on the nas under /nas08-data02/astroml_data

In [4]:
!which python

/usr/bin/python


## Training


In [10]:
!../.venv/bin/sdo-cli sood ce_vae train \
    --config-file="/home/marius/sdo-cli/config/ce-vae/run-fhnw-1.yaml"

2022-06-14 15:02:34,537 sdo.sood.algorithms.ce_vae INFO found config
2022-06-14 15:02:34,538 sdo.sood.algorithms.ce_vae INFO {
  "model": {
    "load_path": {
      "value": null,
      "desc": "Path to a pretrained model"
    },
    "target_size": {
      "value": 256,
      "desc": "Target size of the reconstructed output"
    },
    "z_dim": {
      "value": 128,
      "desc": "Dimension of the latent space"
    },
    "fmap_sizes": {
      "value": [
        16,
        64,
        256,
        1024
      ],
      "desc": "Feature map sizes for the CNN"
    },
    "ce_factor": {
      "value": 0.5,
      "desc": "Amount to which the context-encoder contributes to the model (between 0 only VAE and 1 only CE)"
    }
  },
  "data": {
    "data_dir": {
      "value": "/mnt/nas05/astrodata01/astroml_data/sdomlv2.zarr",
      "desc": "Path to the root directory of the dataset"
    },
    "dataset": {
      "value": "SDOMLDatasetV2",
      "desc": "Which dataset to use (CuratedImageParame

## Predict

In [9]:
# pixel-level predictions

!sdo-cli sood ce_vae predict \
    --config-file="/Users/mariusgiger/repos/master/sdo-cli/config/ce-vae/run-1.yaml"

2022-06-15 16:24:00,185 sdo.sood.algorithms.ce_vae INFO found config
2022-06-15 16:24:00,186 sdo.sood.algorithms.ce_vae INFO {
  "model": {
    "load_path": {
      "value": "/Users/mariusgiger/Downloads/model.ckpt",
      "desc": "Path to a pretrained model"
    },
    "target_size": {
      "value": 256,
      "desc": "Target size of the reconstructed output"
    },
    "z_dim": {
      "value": 128,
      "desc": "Dimension of the latent space"
    },
    "fmap_sizes": {
      "value": [
        16,
        64,
        256,
        1024
      ],
      "desc": "Feature map sizes for the CNN"
    },
    "ce_factor": {
      "value": 0.5,
      "desc": "Amount to which the context-encoder contributes to the model (between 0 only VAE and 1 only CE)"
    }
  },
  "data": {
    "data_dir": {
      "value": "fdl-sdoml-v2/sdomlv2_small.zarr/",
      "desc": "Path to the root directory of the dataset"
    },
    "dataset": {
      "value": "SDOMLDatasetV2",
      "desc": "Which dataset to us

wandb: Network error (ReadTimeout), entering retry loop.


In [None]:
from sdo.sood.algorithms.ce_vae import ceVAE

load_path = "/Users/mariusgiger/Downloads/model_cevae_256.ckpt"
mode = "sample"
cevae_algo = ceVAE.load_from_checkpoint(
            load_path, mode=mode)
cevae_algo.eval()

In [None]:
import pathlib 
import numpy as np
import torch
from torchvision.transforms import Compose, Resize, Normalize, Lambda
import math
from torchvision.utils import save_image

CHANNEL_PREPROCESS = {
    "94": {"min": 0.1, "max": 800, "scaling": "log10"},
    "131": {"min": 0.7, "max": 1900, "scaling": "log10"},
    "171": {"min": 5, "max": 3500, "scaling": "log10"},
    "193": {"min": 20, "max": 5500, "scaling": "log10"},
    "211": {"min": 7, "max": 3500, "scaling": "log10"},
    "304": {"min": 0.1, "max": 3500, "scaling": "log10"},
    "335": {"min": 0.4, "max": 1000, "scaling": "log10"},
    "1600": {"min": 10, "max": 800, "scaling": "log10"},
    "1700": {"min": 220, "max": 5000, "scaling": "log10"},
    "4500": {"min": 4000, "max": 20000, "scaling": "log10"},
    "continuum": {"min": 0, "max": 65535, "scaling": None},
    "magnetogram": {"min": -250, "max": 250, "scaling": None},
    "bx": {"min": -250, "max": 250, "scaling": None},
    "by": {"min": -250, "max": 250, "scaling": None},
    "bz": {"min": -250, "max": 250, "scaling": None},
}

predict_img_path = pathlib.Path("/Users/mariusgiger/repos/master/test-sdo-ml-dataset-ae/extract/train/2012/01/01/AIA20120101_0000_0171.npz")

np_arr = np.load(predict_img_path)["x"]  # .astype(np.float64)
torch_arr = torch.from_numpy(np_arr)
# convert to 1 x H x W, to be in compatible torchvision format
torch_arr = torch_arr.unsqueeze(dim=0)

channel = "171"
preprocess_config = CHANNEL_PREPROCESS[channel.lower()]

target_size = 256
if preprocess_config["scaling"] == "log10":
    # TODO why was vflip(x) used here in SolarNet?
    def lambda_transform(x): return torch.log10(torch.clamp(
            x,
            min=preprocess_config["min"],
            max=preprocess_config["max"],
    ))
    mean = math.log10(preprocess_config["min"])
    std = math.log10(preprocess_config["max"]) - \
            math.log10(preprocess_config["min"])
else:
    def lambda_transform(x): return torch.clamp(
            x,
            min=preprocess_config["min"],
            max=preprocess_config["max"],
    )
    mean = preprocess_config["min"]
    std = preprocess_config["max"] - preprocess_config["min"]

transforms = Compose(
        [Resize((target_size, target_size)),
         Lambda(lambda_transform),
         Normalize(mean=[mean], std=[std]),
         # required to remove strange distribution of pixels (everything too bright)
         Normalize(mean=(0.5), std=(0.5))
         ]
)
torch_arr = transforms(torch_arr)
with torch.no_grad():
    cevae_algo.sample_mode()
    pred = cevae_algo.forward(torch_arr)
    print(pred)
cevae_algo.pixel_mode()
#NOTE this mode requires grad to be enabled
pred_img = cevae_algo.forward(torch_arr)
save_image(pred_img, "./output/test.png", normalize=True)    

In [None]:
# display inverted pixel-wise anomaly scores

from pathlib import Path
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

%config InlineBackend.figure_format = 'retina' 

aia_wave = 171
newest_dir = find_newest_dir('./output/pred/*/')
pixel_pred_path =  newest_dir / Path("predictions")
images = list(Path(pixel_pred_path).rglob(f'*__{aia_wave}.jpeg'))

f, axarr = plt.subplots(1,7, figsize=(20, 9))

row_index = 0
column_index = 0

for index, path in enumerate(images):
    img = Image.open(path)
    img_arr = np.invert(np.asarray(img))
    axarr[column_index].set_title(path.stem)
    axarr[column_index].imshow(img_arr, cmap='gray', vmin=0, vmax=255)
    axarr[column_index].spines['top'].set_visible(False)
    axarr[column_index].spines['right'].set_visible(False)
    axarr[column_index].spines['bottom'].set_visible(False)
    axarr[column_index].spines['left'].set_visible(False)
    axarr[column_index].xaxis.set_ticks([])
    axarr[column_index].yaxis.set_ticks([])

    if(column_index == 6):
        row_index = (row_index + 1)
        
    column_index = (column_index + 1) % 7

In [None]:
# sample-level predictions

!sdo-cli sood ce_vae predict \
    --target-size=256 \
    --data-dir='./data/aia_171_2012_256' \
    --test-dir='./data/aia_171_2012_full_disk_flare_256' \
    --load-path={load_path} \
    -o './output/pred' \
    --logger "file" \
    --ce-factor 0.5 \
    --score-mode combi \
    --mode="sample"

In [None]:
# investigate sample-wise scores

import pandas as pd

newest_dir = find_newest_dir('./output/pred/*/')
sample_pred_path =  newest_dir / Path("predictions/predictions.txt")

df = pd.read_csv(sample_pred_path, header=None, names = ["img", "score"])
df.head(7)

In [None]:
df.describe()

In [None]:
!sdo-cli sood ce_vae generate \
    --target-size=256 \
    --data-dir='./data/aia_171_2012_256' \
    --test-dir='./data/aia_171_2012_full_disk_flare_256' \
    --load-path={load_path} \
    -o './output/' \
    --logger "file" \
    --ce-factor 0.5 \
    --score-mode combi \
    --mode="sample"

In [None]:
gen_data_dir = "./output"
images = list(Path(gen_data_dir).rglob(f'*_generated.jpeg'))
img_path = images[0]

fig = plt.figure(figsize=(15, 15))
plt.axis('off')

src_img = Image.open(img_path)
plt.imshow(np.asarray(src_img))