# Plots

In [None]:
import json

from dvc.api import DVCFileSystem
import pandas as pd
import hvplot.xarray
import holoviews as hv
import xarray as xr

from re_nobm_pcc.kit import TAXA

hv.extension('bokeh', logo=False)

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
experiments = [
    'exp-0d139', # cnn f/32/p > ? > ?, 7996 parameters, swish, weighted loss
    'exp-e7486', # mnn 32 > 64, 19236 parameters, swish, weighted loss
    'exp-d1c73', # linear regression, weighted loss
]

weights = xr.DataArray(
    data=[55.77425765991211, 49.406314849853516, 49.0872688293457, 10.0181884765625],
    coords=[('group', TAXA)],
)
weights_on = ['ME', 'MAE', 'RMSE']

metrics = {}
for item in experiments:
    fs = DVCFileSystem('.', rev=item)
    with fs.open('data/metrics.json') as stream:
        metrics[item] = json.load(stream)
metrics = (
    pd.DataFrame([
        pd.Series(v.values(), index=v, name=k) for k, v in metrics.items()
    ])
    .transpose()
    .drop('loss')
)
metrics.index = pd.MultiIndex.from_tuples(
    (
        tuple(i[1:]) for i in metrics.index.str.split('_')
    )
)
metrics.index = metrics.index.rename(('group', 'metric'))
metrics = metrics.to_xarray()
metrics.loc[{'metric': weights_on}] = metrics.loc[{'metric': weights_on}] * weights
metrics = metrics.loc[{'metric': weights_on + ['R2']}]

In [None]:
(
    metrics
    .hvplot
    .bar('metric', groupby='group', xlabel='')
    .options('Bars', xrotation=45, frame_width=400, fontscale=1.2)
    .layout().cols(2)
    .options('NdLayout', shared_axes=False)
)

# Notes

## 47d9c52f (merge dev into cnn)

### exp-19f79

- cnn with 3 layers (8, 16, 32 filters)
- swish
- 3,284 params

- cya stuck at zero

### exp-c9fa5

- cnn with 3 layers (4, 8, 16 filters)
- 1,180 params
- swish

- chl and coc have noisy val_loss
- cya got stuck at zeros

## 60b305ad (loss by group)

### exp-09501

- mnn with 2 narrowing layers (56, 24)
- 30,980 params
- swish

looks nice

### exp-e7486

- mnn with 2 widening layers (32, 64)
- 19,236 params
- swish

### exp-e1687, exp-d1c73

- linear regression
- ran twice

### exp-6cc07

- cnn with 3 layers (4, 16, 32 filters)
- 4,100 params
- swish

- cya stuck at zero

### exp-7f142

- mnn with 2 narrowing layers (32, 16)
- 17,460 params

## 0a097ce8f (restore convolutions)

- weights MAE by mean (train) abundance

### exp-0d139

- cnn with 3 layers narrowing from 32 filter
- swish
- 7,996 parameters

### exp-b0694

- cnn with 3 layers (8 outputs, window shrinking from 7)
- 5,364 trainable parameters
- swish activation
- coc looks better, but isn't by R2

### exp-11c7f

- cnn with 2 layers widenning from 8 filters
- swish
- ends with dense 64 layer
- 70,788 params

### exp-d1e2b

- cnn with 1 layer, 8 filters
- then a dense 64 layer
- swish
- 89,444 params

### exp-ac07d

- cnn with 1 conv layer having 8 outputs
- 1 dense layer with 32 nodes
- 44k trainable parameters
- terrible

### exp-33f53

- cnn with 1 conv layer having 8 outputs
- 1 dense layer with 16 nodes
- 22k trainable parameters

### exp-b3f84

- cnn with 3 layers decreasing in bands (from 16)
- 1 dense layer with 32 nodes
- 3,344 trainable parameters
- val_loss went bad, overfitting, but seems odd with this few parameters

### exp-72218

- widening cnn (only 9k params) with swish activations
- poor performance except for diatoms

## 2dc27b89 (restore weights)

- weights on MAE

### exp-eab57

- no layers, just regression

In [None]:
import numpy as np

# weights (inverse mean of training data)
weights = np.array(
    [55.77426528930664, 49.406314849853516, 49.087276458740234, 10.018187522888184]
)
print(1/weights)
weights.dot([0.013627, 0.020561, 0.011256, 0.034319])

### exp-c0ff8

- mnn with 3 narrowing (from 128) layers (77,924)
- swish activation
- loss noisy at 200 epochs

### exp-b09b1

- mnn with 3 widening (from 32) layers ()
- swish activation

### exp-b6cb1

- mnn with 3 wide (64 nodes) layers (42,308 trainable parameters)
- swish activate
- very early termination (~80 epochs)

### exp-b9eff

- mnn with 3 narrowing (from 32 nodes) layers (17,564 trainable parameters)
- swish activation

### exp-9f6bf

- mnn with 3 widening (from 8 nodes) layers (5,036 trainable parameters)
- swish activation

### exp-60b2d

- mnn with 3, 32 node layers, relu activation

## 594b900 (simple model with only abundance)

### exp-4781f

change from exp-c7418:
- it weights the MAE loss by inverse of group means
- a marginal improvement in R2

### exp-c7418

change from exp-b7004:
- 'relu' activation
- minimal impact

### exp-b7004

this looks like an okay "naive" case:
- no weight
- has a single layer with 32 nodes and 'swish' activation
- performs fine on data set to 0.0 at or below 10e-something

## 2e21a66 (loss weighted by abundance prob)

### exp-9f291

I don't trust the AUC values shown, as the ROC curves were essentially diagonal. Everything else looked bad to worse.

In [None]:
metrics = {
  "loss": 1.2280960083007812,
  "product_chl_ME": -0.004955189768224955,
  "product_chl_MAE": 0.022813349962234497,
  "product_chl_RMSE": 0.05957883968949318,
  "product_chl_R2": -0.10980522632598877,
  "product_coc_ME": -0.015996867790818214,
  "product_coc_MAE": 0.01973014324903488,
  "product_coc_RMSE": 0.06181754171848297,
  "product_coc_R2": -0.4805917739868164,
  "product_cya_ME": 0.0014727258821949363,
  "product_cya_MAE": 0.016090724617242813,
  "product_cya_RMSE": 0.02713635377585888,
  "product_cya_R2": 0.15429013967514038,
  "product_dia_ME": -0.02798730880022049,
  "product_dia_MAE": 0.06729260087013245,
  "product_dia_RMSE": 0.12573890388011932,
  "product_dia_R2": 0.23511826992034912,
  "presence_chl_loss": 0.3108878433704376,
  "abundance_chl_loss": 0.024213816970586777,
  "presence_coc_loss": 0.3642471730709076,
  "abundance_coc_loss": 0.01825951784849167,
  "presence_cya_loss": 0.38027915358543396,
  "abundance_cya_loss": 0.015522638335824013,
  "presence_dia_loss": 0.04745016619563103,
  "abundance_dia_loss": 0.06723576039075851,
  "presence_chl_AUC": 0.8756850957870483,
  "presence_coc_AUC": 0.8391563892364502,
  "presence_cya_AUC": 0.9311991333961487,
  "presence_dia_AUC": 0.4999741017818451
}

## c106b09

### exp-6852d

### exp-73a57

### exp-681ae

## 55a25bd

### exp-fab79

### exp-4cddf

### exp-19308

### exp-302e1

### exp-cd4ba

## 52b6478

### exp-52daa

### exp-96767

## 9960490

### exp-38b0c