# TorNet Baseline CNN

This notebook trains the baseline keras CNN model on subset and tests it on different categories of TorNet samples.

## Part 1 - Training the Model

### Importing the libraries

In [65]:
import sys
import os
import numpy as np
import json
import shutil
import keras

import logging
logging.basicConfig(level=logging.INFO)

from tornet.data.loader import get_dataloader
from tornet.data.preprocess import get_shape
from tornet.data.constants import ALL_VARIABLES
from tornet.models.keras.losses import mae_loss
from tornet.models.keras.cnn_baseline import build_model
from tornet.metrics.keras import metrics as tfm
from tornet.utils.general import make_exp_dir, make_callback_dirs


### Setting Up Environment Variables

In [66]:
# os.environ['KERAS_BACKEND']='tensorflow' # set to 'tensorflow', 'torch' or 'jax' (installs required)
os.environ['TORNET_ROOT'] = '/Users/dana/Desktop/ML/tornet_project/dataset'
EXP_DIR=os.environ.get('EXP_DIR','.')
DATA_ROOT=os.environ['TORNET_ROOT']
logging.info('TORNET_ROOT='+DATA_ROOT)


INFO:root:TORNET_ROOT=/Users/dana/Desktop/ML/tornet_project/dataset


### Setting Up CNN Configuration

In [67]:
DEFAULT_CONFIG={
    'epochs':10,
    'input_variables':ALL_VARIABLES,
    'train_years':list(range(2021,2022)),
    'val_years':list(range(2021,2022)),
    'batch_size':128,
    'model':'vgg',
    'start_filters':48,
    'learning_rate':1e-4,
    'decay_steps':1386,
    'decay_rate':0.958,
    'l2_reg':1e-5,
    'wN':1.0,
    'w0':1.0,
    'w1':1.0,
    'w2':2.0,
    'wW':0.5,
    'label_smooth':0,
    'loss':'cce',
    'head':'maxpool',
    'exp_name':'tornet_baseline',
    'exp_dir':EXP_DIR,
    'dataloader':"keras",
    'dataloader_kwargs': {}
}

### Building the CNN

In [68]:
def train_keras_baselinecnn_model(config):
    '''
    Train a keras baseline CNN model with the given configuration    
    '''
    # Gather all hyperparams
    epochs = config.get('epochs')
    batch_size = config.get('batch_size')
    start_filters = config.get('start_filters')
    learning_rate = config.get('learning_rate')
    decay_steps = config.get('decay_steps')
    decay_rate = config.get('decay_rate')
    l2_reg = config.get('l2_reg')
    wN = config.get('wN')
    w0 = config.get('w0')
    w1 = config.get('w1')
    w2 = config.get('w2')
    wW = config.get('wW')
    head = config.get('head')
    label_smooth = config.get('label_smooth')
    loss_fn = config.get('loss')
    input_variables = config.get('input_variables')
    exp_name = config.get('exp_name')
    exp_dir = config.get('exp_dir')
    train_years = config.get('train_years')
    val_years = config.get('val_years')
    dataloader = config.get('dataloader')
    dataloader_kwargs = config.get('dataloader_kwargs')

    logging.info(f"Using {keras.config.backend()} backend")
    logging.info(f'Using {dataloader} dataloader')
    logging.info('Running with config:')
    logging.info(config)

    weights = {'wN': wN, 'w0': w0, 'w1': w1, 'w2': w2, 'wW': wW}

    # Create data laoders
    dataloader_kwargs = {'select_keys': input_variables +
                         ['range_folded_mask', 'coordinates']}
    ds_train = get_dataloader(dataloader, DATA_ROOT, train_years,
                              "train", batch_size, weights, **dataloader_kwargs)
    ds_val = get_dataloader(dataloader, DATA_ROOT, val_years,
                            "train", batch_size, weights, **dataloader_kwargs)

    in_shapes = (None, None, 2)
    c_shapes = (None, None, 2)
    nn = build_model(shape=in_shapes,
                     c_shape=c_shapes,
                     start_filters=start_filters,
                     l2_reg=l2_reg,
                     input_variables=input_variables,
                     head=head)

    # model setup
    lr = keras.optimizers.schedules.ExponentialDecay(
        learning_rate, decay_steps, decay_rate, staircase=False, name="exp_decay")

    from_logits = True
    if loss_fn.lower() == 'cce':
        loss = keras.losses.BinaryCrossentropy(from_logits=from_logits,
                                               label_smoothing=label_smooth)
    elif loss_fn.lower() == 'hinge':
        loss = keras.losses.Hinge()  # automatically converts labels to -1,1
    elif loss_fn.lower() == 'mae':
        def loss(yt, yp): return mae_loss(yt, yp)
    else:
        raise RuntimeError('unknown loss %s' % loss_fn)

    opt = keras.optimizers.Adam(learning_rate=lr)

    # Compute various metrics while training
    metrics = [keras.metrics.AUC(from_logits=from_logits, name='AUC', num_thresholds=2000),
               keras.metrics.AUC(from_logits=from_logits,
                                 curve='PR', name='AUCPR', num_thresholds=2000),
               tfm.BinaryAccuracy(from_logits, name='BinaryAccuracy'),
               tfm.TruePositives(from_logits, name='TruePositives'),
               tfm.FalsePositives(from_logits, name='FalsePositives'),
               tfm.TrueNegatives(from_logits, name='TrueNegatives'),
               tfm.FalseNegatives(from_logits, name='FalseNegatives'),
               tfm.Precision(from_logits, name='Precision'),
               tfm.Recall(from_logits, name='Recall'),
               tfm.F1Score(from_logits=from_logits, name='F1')]

    # Compile
    nn.compile(loss=loss,
               metrics=metrics,
               optimizer=opt,
               weighted_metrics=[])

    # FIT with ModelCheckpoint in callbacks
    callbacks = []  # Add other callbacks here if necessary
    nn.fit(ds_train,epochs=epochs,validation_data=ds_val,callbacks=callbacks,verbose=1)

    return nn

### Training the CNN

In [69]:
baselinecnn = train_keras_baselinecnn_model(DEFAULT_CONFIG)
baselinecnn.save('baselinecnn.keras')

INFO:root:Using tensorflow backend
INFO:root:Using keras dataloader
INFO:root:Running with config:
INFO:root:{'epochs': 10, 'input_variables': ['DBZ', 'VEL', 'KDP', 'RHOHV', 'ZDR', 'WIDTH'], 'train_years': [2021], 'val_years': [2021], 'batch_size': 128, 'model': 'vgg', 'start_filters': 48, 'learning_rate': 0.0001, 'decay_steps': 1386, 'decay_rate': 0.958, 'l2_reg': 1e-05, 'wN': 1.0, 'w0': 1.0, 'w1': 1.0, 'w2': 2.0, 'wW': 0.5, 'label_smooth': 0, 'loss': 'cce', 'head': 'maxpool', 'exp_name': 'tornet_baseline', 'exp_dir': '.', 'dataloader': 'keras', 'dataloader_kwargs': {}}


Epoch 1/10


2024-08-01 22:42:05.655145: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:52: Filling up shuffle buffer (this may take a while): 6 of 8
2024-08-01 22:42:09.355059: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2213s[0m 15s/step - AUC: 0.6055 - AUCPR: 0.0902 - BinaryAccuracy: 0.8957 - F1: 0.0249 - FalseNegatives: 680.8400 - FalsePositives: 121.0000 - Precision: 0.0547 - Recall: 0.0308 - TrueNegatives: 8847.4463 - TruePositives: 7.0000 - loss: 0.3326 - val_AUC: 0.6824 - val_AUCPR: 0.1194 - val_BinaryAccuracy: 0.9299 - val_F1: 0.0000e+00 - val_FalseNegatives: 1335.0000 - val_FalsePositives: 0.0000e+00 - val_Precision: 0.0000e+00 - val_Recall: 0.0000e+00 - val_TrueNegatives: 17716.0000 - val_TruePositives: 0.0000e+00 - val_loss: 0.2865
Epoch 2/10


2024-08-01 23:18:56.449845: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:52: Filling up shuffle buffer (this may take a while): 6 of 8
2024-08-01 23:19:00.038464: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2163s[0m 14s/step - AUC: 0.6912 - AUCPR: 0.1359 - BinaryAccuracy: 0.9275 - F1: 0.0000e+00 - FalseNegatives: 681.2534 - FalsePositives: 0.0000e+00 - Precision: 0.0000e+00 - Recall: 0.0000e+00 - TrueNegatives: 8976.8535 - TruePositives: 0.0000e+00 - loss: 0.2839 - val_AUC: 0.7524 - val_AUCPR: 0.1914 - val_BinaryAccuracy: 0.9299 - val_F1: 0.0000e+00 - val_FalseNegatives: 1335.0000 - val_FalsePositives: 0.0000e+00 - val_Precision: 0.0000e+00 - val_Recall: 0.0000e+00 - val_TrueNegatives: 17716.0000 - val_TruePositives: 0.0000e+00 - val_loss: 0.2631
Epoch 3/10


2024-08-01 23:55:00.018590: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:52: Filling up shuffle buffer (this may take a while): 6 of 8
2024-08-01 23:55:03.742669: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2196s[0m 15s/step - AUC: 0.7473 - AUCPR: 0.1884 - BinaryAccuracy: 0.9327 - F1: 0.0000e+00 - FalseNegatives: 670.7333 - FalsePositives: 0.3200 - Precision: 0.0000e+00 - Recall: 0.0000e+00 - TrueNegatives: 8973.7529 - TruePositives: 0.0000e+00 - loss: 0.2527 - val_AUC: 0.7506 - val_AUCPR: 0.2157 - val_BinaryAccuracy: 0.9299 - val_F1: 0.0000e+00 - val_FalseNegatives: 1335.0000 - val_FalsePositives: 0.0000e+00 - val_Precision: 0.0000e+00 - val_Recall: 0.0000e+00 - val_TrueNegatives: 17716.0000 - val_TruePositives: 0.0000e+00 - val_loss: 0.2553
Epoch 4/10


2024-08-02 00:31:36.438920: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:52: Filling up shuffle buffer (this may take a while): 6 of 8
2024-08-02 00:31:40.181231: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2192s[0m 15s/step - AUC: 0.7583 - AUCPR: 0.2146 - BinaryAccuracy: 0.9272 - F1: 0.0000e+00 - FalseNegatives: 691.3200 - FalsePositives: 0.0000e+00 - Precision: 0.0000e+00 - Recall: 0.0000e+00 - TrueNegatives: 8958.2471 - TruePositives: 0.0000e+00 - loss: 0.2589 - val_AUC: 0.7882 - val_AUCPR: 0.2375 - val_BinaryAccuracy: 0.9299 - val_F1: 0.0000e+00 - val_FalseNegatives: 1335.0000 - val_FalsePositives: 0.0000e+00 - val_Precision: 0.0000e+00 - val_Recall: 0.0000e+00 - val_TrueNegatives: 17716.0000 - val_TruePositives: 0.0000e+00 - val_loss: 0.2450
Epoch 5/10


2024-08-02 01:08:07.935508: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:52: Filling up shuffle buffer (this may take a while): 6 of 8
2024-08-02 01:08:11.615364: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2189s[0m 15s/step - AUC: 0.7810 - AUCPR: 0.2163 - BinaryAccuracy: 0.9294 - F1: 0.0437 - FalseNegatives: 653.0600 - FalsePositives: 32.4400 - Precision: 0.2766 - Recall: 0.0243 - TrueNegatives: 8962.5801 - TruePositives: 13.8067 - loss: 0.2432 - val_AUC: 0.7992 - val_AUCPR: 0.2416 - val_BinaryAccuracy: 0.9295 - val_F1: 0.0190 - val_FalseNegatives: 1322.0000 - val_FalsePositives: 21.0000 - val_Precision: 0.3824 - val_Recall: 0.0097 - val_TrueNegatives: 17695.0000 - val_TruePositives: 13.0000 - val_loss: 0.2481
Epoch 6/10


2024-08-02 01:44:37.443174: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:52: Filling up shuffle buffer (this may take a while): 6 of 8
2024-08-02 01:44:41.133147: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2190s[0m 15s/step - AUC: 0.7879 - AUCPR: 0.2291 - BinaryAccuracy: 0.9290 - F1: 0.0480 - FalseNegatives: 656.0000 - FalsePositives: 36.7133 - Precision: 0.3797 - Recall: 0.0259 - TrueNegatives: 8938.0938 - TruePositives: 17.2200 - loss: 0.2411 - val_AUC: 0.8054 - val_AUCPR: 0.2753 - val_BinaryAccuracy: 0.9310 - val_F1: 0.0560 - val_FalseNegatives: 1296.0000 - val_FalsePositives: 19.0000 - val_Precision: 0.6724 - val_Recall: 0.0292 - val_TrueNegatives: 17697.0000 - val_TruePositives: 39.0000 - val_loss: 0.2380
Epoch 7/10


2024-08-02 02:21:07.802152: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:52: Filling up shuffle buffer (this may take a while): 6 of 8
2024-08-02 02:21:11.475652: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2194s[0m 15s/step - AUC: 0.7929 - AUCPR: 0.2429 - BinaryAccuracy: 0.9297 - F1: 0.0985 - FalseNegatives: 633.6866 - FalsePositives: 60.2867 - Precision: 0.4613 - Recall: 0.0564 - TrueNegatives: 8920.8730 - TruePositives: 44.1000 - loss: 0.2382 - val_AUC: 0.8034 - val_AUCPR: 0.2750 - val_BinaryAccuracy: 0.9294 - val_F1: 0.0881 - val_FalseNegatives: 1270.0000 - val_FalsePositives: 75.0000 - val_Precision: 0.4643 - val_Recall: 0.0487 - val_TrueNegatives: 17641.0000 - val_TruePositives: 65.0000 - val_loss: 0.2401
Epoch 8/10


2024-08-02 02:57:42.648869: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:52: Filling up shuffle buffer (this may take a while): 6 of 8
2024-08-02 02:57:46.419270: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2198s[0m 15s/step - AUC: 0.8033 - AUCPR: 0.2678 - BinaryAccuracy: 0.9248 - F1: 0.1397 - FalseNegatives: 614.0200 - FalsePositives: 108.3000 - Precision: 0.3624 - Recall: 0.0875 - TrueNegatives: 8871.3662 - TruePositives: 63.8600 - loss: 0.2376 - val_AUC: 0.8086 - val_AUCPR: 0.2569 - val_BinaryAccuracy: 0.9278 - val_F1: 0.0900 - val_FalseNegatives: 1267.0000 - val_FalsePositives: 108.0000 - val_Precision: 0.3864 - val_Recall: 0.0509 - val_TrueNegatives: 17608.0000 - val_TruePositives: 68.0000 - val_loss: 0.2338
Epoch 9/10


2024-08-02 03:34:20.355261: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:52: Filling up shuffle buffer (this may take a while): 6 of 8
2024-08-02 03:34:24.102567: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2199s[0m 15s/step - AUC: 0.8047 - AUCPR: 0.2579 - BinaryAccuracy: 0.9294 - F1: 0.1561 - FalseNegatives: 607.3134 - FalsePositives: 92.0400 - Precision: 0.4446 - Recall: 0.0966 - TrueNegatives: 8884.8398 - TruePositives: 70.1333 - loss: 0.2302 - val_AUC: 0.8224 - val_AUCPR: 0.3093 - val_BinaryAccuracy: 0.9218 - val_F1: 0.3058 - val_FalseNegatives: 1007.0000 - val_FalsePositives: 482.0000 - val_Precision: 0.4049 - val_Recall: 0.2457 - val_TrueNegatives: 17234.0000 - val_TruePositives: 328.0000 - val_loss: 0.2263
Epoch 10/10


2024-08-02 04:10:59.147243: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:450] ShuffleDatasetV3:52: Filling up shuffle buffer (this may take a while): 6 of 8
2024-08-02 04:11:02.882926: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:480] Shuffle buffer filled.


[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2199s[0m 15s/step - AUC: 0.8192 - AUCPR: 0.2736 - BinaryAccuracy: 0.9235 - F1: 0.2347 - FalseNegatives: 558.9200 - FalsePositives: 174.0067 - Precision: 0.3813 - Recall: 0.1703 - TrueNegatives: 8801.4805 - TruePositives: 113.4800 - loss: 0.2232 - val_AUC: 0.8348 - val_AUCPR: 0.3316 - val_BinaryAccuracy: 0.9313 - val_F1: 0.0750 - val_FalseNegatives: 1282.0000 - val_FalsePositives: 26.0000 - val_Precision: 0.6709 - val_Recall: 0.0397 - val_TrueNegatives: 17690.0000 - val_TruePositives: 53.0000 - val_loss: 0.2279


## Evaluating the Model

In [71]:
import sys
# Uncomment if tornet isn't installed in your environment or in your path already
# sys.path.append('../')  

import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import keras
from tornet.metrics.keras import metrics as tfm
import logging


from tornet.data.keras.loader import KerasDataLoader


### Evaluating on different tornado categories

In [128]:
def get_subset_test_data(years, category):
    '''
    Get a subset of the test data for evaluation
    '''
    # Create test samples
    # Load full catalog and select EF 3+ tornadoes
    os.environ['TORNET_ROOT'] = '/Users/dana/Desktop/ML/tornet_project/dataset'
    data_root=os.environ['TORNET_ROOT']

    catalog_path = os.path.join(data_root,'catalog.csv')
    if not os.path.exists(catalog_path):
        raise RuntimeError('Unable to find catalog.csv at '+data_root)
            
    catalog = pd.read_csv(catalog_path,parse_dates=['start_time','end_time'])

    catalog = catalog[(catalog.start_time.dt.year.isin(years)) & (catalog['category'].isin(category))]
    # catalog = catalog[(catalog.start_time.dt.year.isin([2021]))]

    ds_test = KerasDataLoader(data_root=data_root,
                            data_type='test',
                            random_state=1234,
                            catalog=catalog,
                            batch_size = 64, 
                            use_multiprocessing = True)

    return ds_test

def evalate_model(model, ds_test):
    '''
    Evaluate the model on the test data
    '''
    # Evaluate the model
    # model = keras.models.load_model('baselinecnn.keras')
    # Compute various metrics
    from_logits = True
    metrics = [keras.metrics.AUC(from_logits=from_logits, name='AUC', num_thresholds=2000),
               keras.metrics.AUC(from_logits=from_logits,
                                 curve='PR', name='AUCPR', num_thresholds=2000),
               tfm.BinaryAccuracy(from_logits, name='BinaryAccuracy'),
               tfm.TruePositives(from_logits, name='TruePositives'),
               tfm.FalsePositives(from_logits, name='FalsePositives'),
               tfm.TrueNegatives(from_logits, name='TrueNegatives'),
               tfm.FalseNegatives(from_logits, name='FalseNegatives'),
               tfm.Precision(from_logits, name='Precision'),
               tfm.Recall(from_logits, name='Recall'),
               tfm.F1Score(from_logits=from_logits, name='F1')]
    model.compile(metrics=metrics)

    scores = model.evaluate(ds_test)
    scores = {m.name: scores[k+1] for k, m in enumerate(metrics)}

    logging.info(scores)

In [130]:
categories = [['TOR', 'NUL', 'WRN'], ['TOR', 'NUL'], ['TOR', 'WRN']]
years = [2021,2022]
for category in categories:
    print('Evaluation for category:', category)
    ds_test = get_subset_test_data(years, category)
    evalate_model(baselinecnn, ds_test)
    print('---------------------------------------------------------')
    

Evaluation for category: ['TOR', 'NUL', 'WRN']
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m240s[0m 2s/step - AUC: 0.7503 - AUCPR: 0.1048 - BinaryAccuracy: 0.9625 - F1: 0.0267 - FalseNegatives: 132.3661 - FalsePositives: 5.0268 - Precision: 0.2991 - Recall: 0.0140 - TrueNegatives: 3475.4644 - TruePositives: 1.5179 - loss: 0.0182


INFO:root:{'AUC': 0.7350379228591919, 'AUCPR': 0.0930003821849823, 'BinaryAccuracy': 0.96025550365448, 'TruePositives': 2.0, 'FalsePositives': 10.0, 'TrueNegatives': 6763.0, 'FalseNegatives': 270.0, 'Precision': 0.1666666716337204, 'Recall': 0.007352941203862429, 'F1': 0.014084498398005962}


---------------------------------------------------------
Evaluation for category: ['TOR', 'NUL']
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m160s[0m 2s/step - AUC: 0.8437 - AUCPR: 0.2794 - BinaryAccuracy: 0.9429 - F1: 0.0047 - FalseNegatives: 135.6400 - FalsePositives: 2.5333 - Precision: 0.0882 - Recall: 0.0024 - TrueNegatives: 2291.2932 - TruePositives: 0.5067 - loss: 0.0182


INFO:root:{'AUC': 0.8305799961090088, 'AUCPR': 0.23372143507003784, 'BinaryAccuracy': 0.9409633278846741, 'TruePositives': 2.0, 'FalsePositives': 7.0, 'TrueNegatives': 4413.0, 'FalseNegatives': 270.0, 'Precision': 0.2222222238779068, 'Recall': 0.007352941203862429, 'F1': 0.014234868809580803}


---------------------------------------------------------
Evaluation for category: ['TOR', 'WRN']
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 2s/step - AUC: 0.5395 - AUCPR: 0.1298 - BinaryAccuracy: 0.8907 - F1: 0.0074 - FalseNegatives: 148.8139 - FalsePositives: 1.6047 - Precision: 0.2240 - Recall: 0.0038 - TrueNegatives: 1252.3489 - TruePositives: 0.8140 - loss: 0.0182


INFO:root:{'AUC': 0.5555665493011475, 'AUCPR': 0.13916750252246857, 'BinaryAccuracy': 0.8960000276565552, 'TruePositives': 2.0, 'FalsePositives': 3.0, 'TrueNegatives': 2350.0, 'FalseNegatives': 270.0, 'Precision': 0.4000000059604645, 'Recall': 0.007352941203862429, 'F1': 0.014440430328249931}


---------------------------------------------------------


## Appendix

In [115]:
# Load saved model
model_file = 'baselinecnn.keras'
cnn = keras.models.load_model(model_file,compile=False)

In [116]:
ds_test = get_subset_test_data(['TOR', 'NUL', 'WRN'])
evalate_model(cnn, ds_test)
    

Total number of test samples in the catalog: 48232
Number of test samples in the dataset: 1762
[1m1762/1762[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m224s[0m 122ms/step - AUC: 0.7499 - AUCPR: 0.1054 - BinaryAccuracy: 0.9625 - F1: 0.0271 - FalseNegatives: 128.9949 - FalsePositives: 4.9070 - Precision: 0.3025 - Recall: 0.0142 - TrueNegatives: 3392.5872 - TruePositives: 1.5054 - loss: 0.0182


INFO:root:{'AUC': 0.7350379228591919, 'AUCPR': 0.0930003821849823, 'BinaryAccuracy': 0.96025550365448, 'TruePositives': 2.0, 'FalsePositives': 10.0, 'TrueNegatives': 6763.0, 'FalseNegatives': 270.0, 'Precision': 0.1666666716337204, 'Recall': 0.007352941203862429, 'F1': 0.014084498398005962}


In [133]:
# Download the pretrained model for inference from huggingface
from huggingface_hub import hf_hub_download
model_file = hf_hub_download(repo_id="tornet-ml/tornado_detector_baseline_v1", 
                             filename="tornado_detector_baseline.keras")

# Alternatively, you can manually download the .keras file and put in the ../models/ directory
# https://huggingface.co/tornet-ml/tornado_detector_baseline_v1
#model_file = '../models/tornado_detector_baseline.keras' 

# Load pretrained model
pretrained_cnn = keras.models.load_model(model_file,compile=False)


In [134]:
categories = [['TOR', 'NUL', 'WRN'], ['TOR', 'NUL'], ['TOR', 'WRN']]
years = [2021,2022]
for category in categories:
    print('Evaluation for category with pretrained model:', category)
    print('Evaluation for category:', category)
    ds_test = get_subset_test_data(years, category)
    evalate_model(pretrained_cnn, ds_test)
    print('---------------------------------------------------------')
    

Evaluation for category with pretrained model: ['TOR', 'NUL', 'WRN']
Evaluation for category: ['TOR', 'NUL', 'WRN']
[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m238s[0m 2s/step - AUC: 0.8788 - AUCPR: 0.3350 - BinaryAccuracy: 0.9524 - F1: 0.4076 - FalseNegatives: 75.2232 - FalsePositives: 92.4554 - Precision: 0.3789 - Recall: 0.4439 - TrueNegatives: 3388.0356 - TruePositives: 58.6607 - loss: 0.0090


INFO:root:{'AUC': 0.8783445358276367, 'AUCPR': 0.33739280700683594, 'BinaryAccuracy': 0.9535840749740601, 'TruePositives': 115.0, 'FalsePositives': 170.0, 'TrueNegatives': 6603.0, 'FalseNegatives': 157.0, 'Precision': 0.4035087823867798, 'Recall': 0.4227941036224365, 'F1': 0.4129263162612915}


---------------------------------------------------------
Evaluation for category with pretrained model: ['TOR', 'NUL']
Evaluation for category: ['TOR', 'NUL']
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m160s[0m 2s/step - AUC: 0.9427 - AUCPR: 0.6879 - BinaryAccuracy: 0.9661 - F1: 0.6163 - FalseNegatives: 73.6933 - FalsePositives: 12.5467 - Precision: 0.8482 - Recall: 0.4858 - TrueNegatives: 2281.2800 - TruePositives: 62.4533 - loss: 0.0090


INFO:root:{'AUC': 0.9288573265075684, 'AUCPR': 0.6525731086730957, 'BinaryAccuracy': 0.9618499279022217, 'TruePositives': 115.0, 'FalsePositives': 22.0, 'TrueNegatives': 4398.0, 'FalseNegatives': 157.0, 'Precision': 0.8394160866737366, 'Recall': 0.4227941036224365, 'F1': 0.5623471736907959}


---------------------------------------------------------
Evaluation for category with pretrained model: ['TOR', 'WRN']
Evaluation for category: ['TOR', 'WRN']
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 2s/step - AUC: 0.7658 - AUCPR: 0.3699 - BinaryAccuracy: 0.8730 - F1: 0.4099 - FalseNegatives: 88.0930 - FalsePositives: 84.3256 - Precision: 0.4127 - Recall: 0.4078 - TrueNegatives: 1169.6279 - TruePositives: 61.5349 - loss: 0.0090


INFO:root:{'AUC': 0.7834585905075073, 'AUCPR': 0.3812639117240906, 'BinaryAccuracy': 0.883809506893158, 'TruePositives': 115.0, 'FalsePositives': 148.0, 'TrueNegatives': 2205.0, 'FalseNegatives': 157.0, 'Precision': 0.4372623562812805, 'Recall': 0.4227941036224365, 'F1': 0.4299064576625824}


---------------------------------------------------------
