In [1]:
%load_ext autoreload
%autoreload 2
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
# If you are using Google Collab you will need this cell
try:
    %tensorflow_version 2.x
except Exception:
    import os

    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"

<IPython.core.display.Javascript object>

In [3]:
# Change the SETUP = True if this is your fist run
SETUP = False

<IPython.core.display.Javascript object>

In [4]:
if SETUP:
    !pip install -q -U toai==0.3.7
    !pip install -q -U nb_black
    !pip install -q -U tensorflow-datasets
    !pip install -q -U --no-deps tensorflow-addons
    !pip install -q -U tensorflow_hub
    print(__import__("toai").__version__)
    print(__import__("tensorflow").__version__)

<IPython.core.display.Javascript object>

In [5]:
from toai.imports import *
from toai.utils import save_file, load_file
from toai.data import DataContainer, DataBundle
from toai.metrics import sparse_top_2_categorical_accuracy
from toai.models import save_keras_model, load_keras_model
import tensorflow as tf
from tensorflow import keras
import tensorflow_hub as hub



<IPython.core.display.Javascript object>

In [6]:
DATA_DIR = Path("data/fake-news")
TEMP_DIR = Path("temp/fake-news")

<IPython.core.display.Javascript object>

In [7]:
if SETUP:
    shutil.rmtree(str(DATA_DIR), ignore_errors=True)
    shutil.rmtrbee(str(TEMP_DIR), ignore_errors=True)
    DATA_DIR.mkdir(parents=True)
    TEMP_DIR.mkdir(parents=True)
    kaggle.api.authenticate()
    kaggle.api.competition_download_files(competition="fake-news", path=DATA_DIR)
    shutil.unpack_archive(str(DATA_DIR / "fake-news.zip"), DATA_DIR)

<IPython.core.display.Javascript object>

In [8]:
BATCH_SIZE = 32
AUTOTUNE = tf.data.experimental.AUTOTUNE

<IPython.core.display.Javascript object>

In this dataset all the example are in one csv!

--> easy data exploration using pandas

In [9]:
all_data = pd.read_csv(DATA_DIR / "train.csv", low_memory=False, index_col="id")

<IPython.core.display.Javascript object>

In [10]:
all_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 20800 entries, 0 to 20799
Data columns (total 4 columns):
title     20242 non-null object
author    18843 non-null object
text      20761 non-null object
label     20800 non-null int64
dtypes: int64(1), object(3)
memory usage: 812.5+ KB


<IPython.core.display.Javascript object>

label is the only column that doesn't have missing labels, but there are missing titles, authors etc.

In [11]:
all_data.describe(include="all")

Unnamed: 0,title,author,text,label
count,20242,18843,20761.0,20800.0
unique,19803,4201,20386.0,
top,The Dark Agenda Behind Globalism And Open Borders,Pam Key,,
freq,5,243,75.0,
mean,,,,0.500625
std,,,,0.500012
min,,,,0.0
25%,,,,0.0
50%,,,,1.0
75%,,,,1.0


<IPython.core.display.Javascript object>

label is a number (probably 0 for real news and 1 for fake news). The median is 1 so it seems like a rather balanced data set.

There is quite a high cardinality on authors so we won't encode it as categorical variable, we just use it as string like the other ones.

In [12]:
all_data.head(10)

Unnamed: 0_level_0,title,author,text,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,House Dem Aide: We Didn’t Even See Comey’s Let...,Darrell Lucus,House Dem Aide: We Didn’t Even See Comey’s Let...,1
1,"FLYNN: Hillary Clinton, Big Woman on Campus - ...",Daniel J. Flynn,Ever get the feeling your life circles the rou...,0
2,Why the Truth Might Get You Fired,Consortiumnews.com,"Why the Truth Might Get You Fired October 29, ...",1
3,15 Civilians Killed In Single US Airstrike Hav...,Jessica Purkiss,Videos 15 Civilians Killed In Single US Airstr...,1
4,Iranian woman jailed for fictional unpublished...,Howard Portnoy,Print \nAn Iranian woman has been sentenced to...,1
5,Jackie Mason: Hollywood Would Love Trump if He...,Daniel Nussbaum,"In these trying times, Jackie Mason is the Voi...",0
6,Life: Life Of Luxury: Elton John’s 6 Favorite ...,,Ever wonder how Britain’s most iconic pop pian...,1
7,Benoît Hamon Wins French Socialist Party’s Pre...,Alissa J. Rubin,"PARIS — France chose an idealistic, traditi...",0
8,Excerpts From a Draft Script for Donald Trump’...,,Donald J. Trump is scheduled to make a highly ...,0
9,"A Back-Channel Plan for Ukraine and Russia, Co...",Megan Twohey and Scott Shane,A week before Michael T. Flynn resigned as nat...,0


<IPython.core.display.Javascript object>

Fill missing values

In [13]:
all_data["title"].fillna("No Title", inplace=True)

<IPython.core.display.Javascript object>

In [14]:
all_data["author"].fillna("Anonymous", inplace=True)

<IPython.core.display.Javascript object>

In [15]:
all_data["text"].fillna("", inplace=True)

<IPython.core.display.Javascript object>

Check if there are still missing values:

In [16]:
all_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 20800 entries, 0 to 20799
Data columns (total 4 columns):
title     20800 non-null object
author    20800 non-null object
text      20800 non-null object
label     20800 non-null int64
dtypes: int64(1), object(3)
memory usage: 812.5+ KB


<IPython.core.display.Javascript object>

In [17]:
all_data["label"].value_counts()

1    10413
0    10387
Name: label, dtype: int64

<IPython.core.display.Javascript object>

We have a balanced data set - yeah!

Let's concatenate the input into one big string. To just have 1 textual feature rather than 3.

In [18]:
all_data["message"] = (
    all_data["author"] + " " + all_data["title"] + " " + all_data["text"]
)

<IPython.core.display.Javascript object>

In [19]:
all_data["message"]

id
0        Darrell Lucus House Dem Aide: We Didn’t Even S...
1        Daniel J. Flynn FLYNN: Hillary Clinton, Big Wo...
2        Consortiumnews.com Why the Truth Might Get You...
3        Jessica Purkiss 15 Civilians Killed In Single ...
4        Howard Portnoy Iranian woman jailed for fictio...
                               ...                        
20795    Jerome Hudson Rapper T.I.: Trump a ’Poster Chi...
20796    Benjamin Hoffman N.F.L. Playoffs: Schedule, Ma...
20797    Michael J. de la Merced and Rachel Abrams Macy...
20798    Alex Ansary NATO, Russia To Hold Parallel Exer...
20799    David Swanson What Keeps the F-35 Alive   Davi...
Name: message, Length: 20800, dtype: object

<IPython.core.display.Javascript object>

What are classmethods usually used for?

* as alternative initialisers

In [20]:
data_bundle = DataBundle.from_dataframe(
    dataframe=all_data, x_col="message", y_col="label"
)

<IPython.core.display.Javascript object>

Split the whole data into 3 parts (train 80%, validate 10% and test 10%). Set `random=False` so when we rerun the notebook we see exactly the same observations in the different sets.

In [21]:
train_data, valid_data, test_data = DataBundle.split(
    data_bundle=data_bundle, fracs=(0.8, 0.1, 0.1), random=False
)

<IPython.core.display.Javascript object>

In [22]:
@attr.s(auto_attribs=True)
class TextPreprocessor:
    max_length: int = 100
    default_value: str = b"<pad>"

    def __call__(self, text: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
        text = tf.strings.regex_replace(text, b"[^a-zA-Z']", b" ")
        # currently in deep learning it's actually advised not to so lower casing
        # but if you have a small data set it might be helpful
        text = tf.strings.lower(text)
        # split text into words
        text = tf.strings.split(text)
        # limit how many words we want to consider
        text = text[:, : self.max_length]
        # convert from a ragged tensor to a tensor
        # ragged tensor: length of the tensor can vary in size, 100 words in one message and 3 words in another
        # tensor: all messages have the same amount of words (and fill with defualt_value mentioned above)
        return text.to_tensor(default_value=self.default_value), label

<IPython.core.display.Javascript object>

`__call__` enables us to say `TextPreprocessor()` and make use of default arguments

In [23]:
base_dataset = (
    train_data.to_dataset()
    .shuffle(len(train_data))
    .batch(BATCH_SIZE)
    .map(TextPreprocessor(), num_parallel_calls=AUTOTUNE)
)

<IPython.core.display.Javascript object>

In [24]:
def make_vocabulary(dataset):
    vocabulary = Counter()
    for x, _ in dataset:
        for review in x:
            vocabulary.update(review.numpy().tolist())
    return vocabulary

<IPython.core.display.Javascript object>

In [25]:
vocabulary = make_vocabulary(base_dataset)

<IPython.core.display.Javascript object>

In [26]:
len(vocabulary)

53282

<IPython.core.display.Javascript object>

In [27]:
vocabulary.most_common()[:10]

[(b'the', 88447),
 (b'<pad>', 84547),
 (b'of', 39225),
 (b'to', 38017),
 (b'a', 36034),
 (b'and', 31123),
 (b'in', 29604),
 (b's', 19674),
 (b'on', 17192),
 (b'that', 15916)]

<IPython.core.display.Javascript object>

Limit vocabulary size to 50k, original size was 53k so we don't care about uncommon words.

But we don't want the model to break if it comes around a word it doesn't know: when that happens hash the word and place the hash in one of the buckets (with which the vocabulary will be enlarged later on).

In [28]:
VOCABULARY_SIZE = 50000

<IPython.core.display.Javascript object>

In [29]:
truncated_vocabulary = [
    word for word, count in vocabulary.most_common()[:VOCABULARY_SIZE]
]

<IPython.core.display.Javascript object>

In [30]:
len(truncated_vocabulary)

50000

<IPython.core.display.Javascript object>

In [31]:
word_to_id = {word: index for index, word in enumerate(truncated_vocabulary)}

<IPython.core.display.Javascript object>

In [32]:
for word in b"get rich quick".split():
    print(word_to_id.get(word) if word_to_id.get(word) is not None else VOCABULARY_SIZE)

129
1296
2313


<IPython.core.display.Javascript object>

In [33]:
words = tf.constant(truncated_vocabulary)

<IPython.core.display.Javascript object>

In [34]:
word_ids = tf.range(len(truncated_vocabulary), dtype=tf.int64)

<IPython.core.display.Javascript object>

Add 5000 out of vocabulary buckets

In [35]:
n_oov_buckets = VOCABULARY_SIZE // 10

<IPython.core.display.Javascript object>

In [36]:
n_oov_buckets

5000

<IPython.core.display.Javascript object>

Use a dictionary to encode labels

In [37]:
table = tf.lookup.StaticVocabularyTable(
    tf.lookup.KeyValueTensorInitializer(words, word_ids), n_oov_buckets
)

<IPython.core.display.Javascript object>

Test what happens when retrieving an unknown word?

It returns something (as opposed to failing)

In [38]:
table.lookup(tf.constant([b"mooney baby, moooney".split()]))

<tf.Tensor: id=67769, shape=(1, 3), dtype=int64, numpy=array([[48645, 54568, 53087]])>

<IPython.core.display.Javascript object>

In [39]:
@attr.s(auto_attribs=True)
class WordEncoder:
    vocabulary_table: tf.lookup.StaticVocabularyTable

    def __call__(self, text: tf.Tensor, labels: tf.Tensor) -> tf.Tensor:
        return self.vocabulary_table.lookup(text), labels

<IPython.core.display.Javascript object>

In [40]:
train_dataset = (
    train_data.to_dataset()
    .shuffle(len(train_data))
    .batch(BATCH_SIZE)
    .map(TextPreprocessor(), num_parallel_calls=AUTOTUNE)
    .map(WordEncoder(vocabulary_table=table), num_parallel_calls=AUTOTUNE)
    .cache()
    .repeat()
    .prefetch(AUTOTUNE)
)

<IPython.core.display.Javascript object>

In [41]:
valid_dataset = (
    valid_data.to_dataset()
    .batch(BATCH_SIZE)
    .map(TextPreprocessor(), num_parallel_calls=AUTOTUNE)
    .map(WordEncoder(vocabulary_table=table), num_parallel_calls=AUTOTUNE)
    .cache()
    .prefetch(AUTOTUNE)
)

<IPython.core.display.Javascript object>

In [42]:
test_dataset = (
    test_data.to_dataset()
    .batch(BATCH_SIZE)
    .map(TextPreprocessor(), num_parallel_calls=AUTOTUNE)
    .map(WordEncoder(vocabulary_table=table), num_parallel_calls=AUTOTUNE)
    .cache()
    .prefetch(AUTOTUNE)
)

<IPython.core.display.Javascript object>

In [43]:
data_container = DataContainer(
    base=train_dataset,
    train=train_dataset,
    train_steps=math.ceil(len(train_data) / BATCH_SIZE),
    validation=valid_dataset,
    test=test_dataset,
    label_map={0: 0, 1: 1},
)

<IPython.core.display.Javascript object>

Check if training data set got encoded correctly:

With deep learning you don't get compile or run time errors.

It's mostly that the training time takes forever --> and then need to check if the input/output shapes are correct

In [44]:
for x, y in data_container.train.take(1):
    print(x.shape)
    print(y.shape)
    print(x[0])
    print(y[0])

(32, 100)
(32,)
tf.Tensor(
[  854  4193   171  5463  6152    12  6907  4730    33  4193   171  5463
  6152    12  6907  4730    33   359    85   462   129   831  2911  4193
   860   116   317   160 10274   594     5   181    81     7   116   404
    42    12  1194   812    12 10245     5    32  8932     5  2703   127
     2  7624    64 10826 13745     5   202  3750     0  4193   171  5463
   404   160 10274    37    22    69   679   185     0    61    38  6907
  3441    33   391  6038    46 10274  2946  1074     7   611  5073    37
    22    69  1370     8     0  4193   171  2958   185     0    61    38
 10484     9    12   159], shape=(100,), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)


<IPython.core.display.Javascript object>

The higher the number the more rare the word.

Check if validation data set got encoded correctly:

In [45]:
for x, y in data_container.validation.take(1):
    print(x.shape)
    print(y.shape)
    print(x[0])
    print(y[0])

(32, 100)
(32,)
tf.Tensor(
[ 5456  2192  1986    12 22702    46   348   293     5    24    85    20
     0    96  4538   507   695   815  2581  5725  3902  3392  5726   631
  4373  4546  5444     0 10044    60   217   312    10  1986    12     7
   276   100   348     0   138    49    65     7   571   157   131   217
  2114  5280    10  4033    46   341  1530    20     0  9261    20  2028
   117    83    77  1595  1065    84  3399    10     0    12   100     0
  1183   445    29  3927     3    46    65 22702   293    16   285   479
    83    28  1178    10    54     3  1618    16    66    38     0   507
    43    29    54 14209], shape=(100,), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)


<IPython.core.display.Javascript object>

Here we're gonna create a model from scratch (not using pretrained model).

We do it here because the data is so balanced that this is gonna be a piece-of-cake.

2-layer bi-directional LSTM with a GlobalMaxPool1D on top and a Dense layer at the end:
* start with embedding layer to get a lower dimensional representation of the data (of a potentially sparse space) to extract latent features from the categorical variables
    * embedding size 256 and inputs is the sum of vocabulary and the out-of-vocabulary buckets


In [46]:
def make_lstm_model(n_categories, embedding_size, lstm_size, lstm_dropout, dropout):
    return keras.models.Sequential(
        [
            # extract latent features from the categorical variables
            # usually you want the embeddings to be pretrained
            keras.layers.Embedding(
                VOCABULARY_SIZE + n_oov_buckets,
                embedding_size,
                # deals with padding to train faster
                mask_zero=True,
                # input shape can be anything
                input_shape=[None],
            ),
            keras.layers.Bidirectional(
                keras.layers.LSTM(
                    lstm_size, dropout=lstm_dropout, return_sequences=True
                )
            ),
            keras.layers.Bidirectional(
                keras.layers.LSTM(
                    lstm_size, dropout=lstm_dropout, return_sequences=True
                )
            ),
            keras.layers.GlobalMaxPool1D(),
            keras.layers.Dropout(dropout),
            keras.layers.Dense(n_categories, activation=keras.activations.softmax),
        ]
    )

<IPython.core.display.Javascript object>

In [47]:
model = make_lstm_model(
    n_categories=data_container.n_classes,
    embedding_size=256,
    lstm_size=256,
    lstm_dropout=0.2,
    dropout=0.5,
)

<IPython.core.display.Javascript object>

Same loss and optimizer as for the computer vision example:

In [48]:
model.compile(
    loss=keras.losses.sparse_categorical_crossentropy,
    optimizer=keras.optimizers.Adam(lr=3e-4),
    metrics=[keras.metrics.sparse_categorical_accuracy],
)

<IPython.core.display.Javascript object>

In [49]:
history = model.fit(
    data_container.train,
    steps_per_epoch=data_container.train_steps // 10,
    validation_data=data_container.validation,
    epochs=20,
    callbacks=[
        keras.callbacks.ReduceLROnPlateau(patience=1, factor=0.3),
        keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True),
    ],
)

Train for 52 steps, validate for 65 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20


<IPython.core.display.Javascript object>

After network has seen all data accuracy is `val_sparse_categorical_accuracy: 0.9846` - pretty good!

In [50]:
model.evaluate(data_container.validation)



[0.05768993731874686, 0.9846154]

<IPython.core.display.Javascript object>

In [51]:
print(
    classification_report(
        [y.numpy() for _, y in data_container.validation.unbatch()],
        model.predict(data_container.validation).argmax(axis=1),
    )
)

              precision    recall  f1-score   support

           0       0.98      0.99      0.98      1047
           1       0.99      0.98      0.98      1033

    accuracy                           0.98      2080
   macro avg       0.98      0.98      0.98      2080
weighted avg       0.98      0.98      0.98      2080



<IPython.core.display.Javascript object>

These results look good, we're not worried about any class imbalances.

For the cases when this is not enough, turn to tensorflow hub:

* pretrained embedding layers
* pretrained language models

Also note, using the pretrained embedding layers you can feed the training data in pretty raw (no tolenization, no shuffling, and whatever else we did for computer vision.)

In [52]:
train_dataset = (
    train_data.to_dataset()
    .shuffle(len(train_data))
    .batch(BATCH_SIZE)
    .cache()
    .repeat()
    .prefetch(AUTOTUNE)
)

<IPython.core.display.Javascript object>

In [53]:
valid_dataset = valid_data.to_dataset().batch(BATCH_SIZE).cache().prefetch(AUTOTUNE)

<IPython.core.display.Javascript object>

In [54]:
test_dataset = test_data.to_dataset().batch(BATCH_SIZE).cache().prefetch(AUTOTUNE)

<IPython.core.display.Javascript object>

In [55]:
data_container = DataContainer(
    base=train_dataset,
    train=train_dataset,
    train_steps=math.ceil(len(train_data) / BATCH_SIZE),
    validation=valid_dataset,
    test=test_dataset,
    label_map={0: 0, 1: 1},
)

<IPython.core.display.Javascript object>

Use the pretrained embedding:

(spoiler will get us to `val_sparse_categorical_accuracy: 0.9928`)

In [56]:
def train_model(
    model,
    data_container,
    epochs,
    lrs=None,
    optimizers=None,
    patience=5,
    class_weights=None,
    verbose=1,
    log_dir=str(TEMP_DIR / "logs"),
):
    if optimizers is None:
        optimizers = [keras.optimizers.Adam(lr) for lr in lrs]
    model.compile(
        loss=keras.losses.sparse_categorical_crossentropy,
        optimizer=optimizers[0],
        metrics=[keras.metrics.sparse_categorical_accuracy],
    )
    model.fit(
        data_container.train,
        steps_per_epoch=data_container.train_steps,
        validation_data=data_container.validation,
        epochs=epochs[0],
        callbacks=[
            keras.callbacks.ReduceLROnPlateau(patience=patience, factor=0.3),
            keras.callbacks.EarlyStopping(patience=patience, restore_best_weights=True),
        ],
        class_weight=class_weights,
        verbose=verbose,
    )
    model.layers[0].trainable = True
    model.compile(
        loss=keras.losses.sparse_categorical_crossentropy,
        optimizer=optimizers[1],
        metrics=[keras.metrics.sparse_categorical_accuracy],
    )
    model.fit(
        data_container.train,
        steps_per_epoch=data_container.train_steps,
        validation_data=data_container.validation,
        epochs=epochs[1],
        callbacks=[
            keras.callbacks.ReduceLROnPlateau(patience=patience // 2, factor=0.3),
            keras.callbacks.EarlyStopping(patience=patience, restore_best_weights=True),
            keras.callbacks.TensorBoard(log_dir=log_dir),
        ],
        class_weight=class_weights,
        verbose=verbose,
    )

<IPython.core.display.Javascript object>

In [57]:
def make_hub_model(url, n_categories):
    return keras.Sequential(
        [
            hub.KerasLayer(url, dtype=tf.string, input_shape=[]),
            keras.layers.Dropout(0.5),
            keras.layers.Dense(n_categories, activation=keras.activations.softmax),
        ]
    )

<IPython.core.display.Javascript object>

In [58]:
def run_models(urls, data_container, class_weights):
    for url in urls:
        model = make_hub_model(url, data_container.n_classes)
        model_name = f"{url.split('/')[4]}"
        print(f" {model_name} ".center(80, "="))
        shutil.rmtree(str(TEMP_DIR / model_name), ignore_errors=True)
        train_model(
            model=model,
            data_container=data_container,
            epochs=[25, 15],
            optimizers=[keras.optimizers.Adam(lr=3e-4), keras.optimizers.Adam(lr=1e-4)],
            class_weights=class_weights,
            patience=4,
            verbose=2,
            log_dir=str(TEMP_DIR / model_name),
        )
        model.save(f"{TEMP_DIR / model_name}.h5")
        save_keras_model(
            model,
            str(TEMP_DIR / model_name / "architecture"),
            str(TEMP_DIR / model_name / "weights"),
        )
        keras.backend.clear_session()
        del model
        keras.backend.clear_session()

<IPython.core.display.Javascript object>

In [59]:
def evaluate_models(urls, data_container):
    reports = {}
    for url in urls:
        model_name = f"{url.split('/')[4]}"
        print(f" {model_name} ".center(80, "="))
        try:
            model = keras.model.load_model(
                f"{TEMP_DIR / model_name}.h5",
                custom_objects={"KerasLayer": hub.KerasLayer},
            )
        except:
            print(f"Loading architecture & weights separately")
            model = load_keras_model(
                str(TEMP_DIR / model_name / "architecture"),
                str(TEMP_DIR / model_name / "weights"),
                custom_objects={"KerasLayer": hub.KerasLayer},
            )
        reports[model_name] = classification_report(
            [
                label.numpy()
                for _, label in data_container.validation.take(-1).unbatch()
            ],
            model.predict(data_container.validation).argmax(axis=1),
        )
        del model
    return reports

<IPython.core.display.Javascript object>

Go to tensorflow hub to pick pretrained language models:

In [60]:
model_urls = (
    "https://tfhub.dev/google/Wiki-words-250-with-normalization/2",
    "https://tfhub.dev/google/nnlm-en-dim128-with-normalization/2",
)

<IPython.core.display.Javascript object>

In [61]:
run_models(model_urls, data_container, None)

Train for 520 steps, validate for 65 steps
Epoch 1/25
520/520 - 15s - loss: 0.8740 - sparse_categorical_accuracy: 0.5410 - val_loss: 0.5967 - val_sparse_categorical_accuracy: 0.6476
Epoch 2/25
520/520 - 14s - loss: 0.6881 - sparse_categorical_accuracy: 0.6258 - val_loss: 0.5480 - val_sparse_categorical_accuracy: 0.7250
Epoch 3/25
520/520 - 13s - loss: 0.6067 - sparse_categorical_accuracy: 0.6812 - val_loss: 0.5277 - val_sparse_categorical_accuracy: 0.7543
Epoch 4/25
520/520 - 14s - loss: 0.5746 - sparse_categorical_accuracy: 0.7028 - val_loss: 0.5155 - val_sparse_categorical_accuracy: 0.7683
Epoch 5/25
520/520 - 14s - loss: 0.5533 - sparse_categorical_accuracy: 0.7225 - val_loss: 0.5057 - val_sparse_categorical_accuracy: 0.7678
Epoch 6/25
520/520 - 13s - loss: 0.5497 - sparse_categorical_accuracy: 0.7260 - val_loss: 0.4990 - val_sparse_categorical_accuracy: 0.7731
Epoch 7/25
520/520 - 14s - loss: 0.5420 - sparse_categorical_accuracy: 0.7312 - val_loss: 0.4942 - val_sparse_categorical_a



520/520 - 47s - loss: 0.4062 - sparse_categorical_accuracy: 0.8255 - val_loss: 0.2802 - val_sparse_categorical_accuracy: 0.9130
Epoch 2/15
520/520 - 46s - loss: 0.2540 - sparse_categorical_accuracy: 0.9146 - val_loss: 0.1908 - val_sparse_categorical_accuracy: 0.9519
Epoch 3/15
520/520 - 42s - loss: 0.1730 - sparse_categorical_accuracy: 0.9490 - val_loss: 0.1435 - val_sparse_categorical_accuracy: 0.9673
Epoch 4/15
520/520 - 41s - loss: 0.1269 - sparse_categorical_accuracy: 0.9670 - val_loss: 0.1164 - val_sparse_categorical_accuracy: 0.9760
Epoch 5/15
520/520 - 41s - loss: 0.0956 - sparse_categorical_accuracy: 0.9767 - val_loss: 0.0989 - val_sparse_categorical_accuracy: 0.9774
Epoch 6/15
520/520 - 41s - loss: 0.0750 - sparse_categorical_accuracy: 0.9829 - val_loss: 0.0866 - val_sparse_categorical_accuracy: 0.9788
Epoch 7/15
520/520 - 41s - loss: 0.0610 - sparse_categorical_accuracy: 0.9873 - val_loss: 0.0779 - val_sparse_categorical_accuracy: 0.9832
Epoch 8/15
520/520 - 41s - loss: 0.048

<IPython.core.display.Javascript object>

In [62]:
reports = evaluate_models(model_urls, data_container)

Loading architecture & weights separately
Loading architecture & weights separately


<IPython.core.display.Javascript object>

In [63]:
for model_name, report in reports.items():
    print(f" {model_name} ".center(80, "="))
    print(report)

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1047
           1       0.99      0.99      0.99      1033

    accuracy                           0.99      2080
   macro avg       0.99      0.99      0.99      2080
weighted avg       0.99      0.99      0.99      2080

              precision    recall  f1-score   support

           0       0.99      1.00      0.99      1047
           1       1.00      0.99      0.99      1033

    accuracy                           0.99      2080
   macro avg       0.99      0.99      0.99      2080
weighted avg       0.99      0.99      0.99      2080



<IPython.core.display.Javascript object>