In [2]:
import gc
import pickle
import shutil
from concurrent.futures import ProcessPoolExecutor
from pathlib import Path

import librosa
import librosa.display
import numpy as np
import pandas as pd
import tensorflow as tf

from matplotlib import pyplot as plt
from sklearn.metrics import precision_score, accuracy_score
from keras.applications.vgg16 import VGG16
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from tensorflow.keras.layers import (
    AveragePooling2D,
    BatchNormalization,
    Conv2D,
    Dense,
    Dropout,
    Flatten,
    MaxPooling2D,
)
from tensorflow.keras.losses import MeanSquaredLogarithmicError
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adadelta
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.math import confusion_matrix


In [35]:
import sys
import logging

nblog = open("data/nb.log", "a+")
sys.stdout.echo = nblog
sys.stderr.echo = nblog

get_ipython().log.handlers[0].stream = nblog
get_ipython().log.setLevel(logging.INFO)

%autosave 5

Autosaving every 5 seconds


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip -qq drive/MyDrive/data/images.zip

In [3]:
# constants
DATA_DIR = Path("data")
#DATA_DIR = Path("/content/drive/MyDrive/data")
#IMG_DIR = Path("/content/images")
IMG_DIR = DATA_DIR / "images"
TRAIN_DIR = IMG_DIR / "train"
TEST_DIR = IMG_DIR / "test"
US8K_DIR = DATA_DIR / "UrbanSound8K"
AUDIO_DIR = US8K_DIR / "audio"
META_CSV = US8K_DIR / "metadata" / "UrbanSound8K.csv"
LOG_DIR = DATA_DIR / "logs"
NUM_CLASSES = 10


In [4]:
# hyperparameters
SAMPLING_RATE = 16000  # paper: 16000, other values: 22050, 44100
CHUNK_SIZE = int(1 * SAMPLING_RATE)  # paper: 16000 (i.e. 1 second), others: 0.1 second
OVERLAP_PERCENT = 75  # paper: 75%

IMG_HEIGHT = IMG_WIDTH = 72

# model related
BATCH_SIZE = 100  # paper: 100
EPOCHS = 100  # paper: 100
EARLY_STOP_PATIENCE = 10


In [5]:
# calculated constants
STRIDE = int((1 - OVERLAP_PERCENT / 100) * CHUNK_SIZE)
MODEL_ID = f"Sr{SAMPLING_RATE}Cs{CHUNK_SIZE}Ol{OVERLAP_PERCENT}"
FOLD_DATA_DIR = DATA_DIR / f"foldData.{MODEL_ID}.pickle"
FOLD_DATA = DATA_DIR / f"foldData.{MODEL_ID}.pickle"
MODEL_FILE = DATA_DIR / "2d_saved_models" / f"model.{MODEL_ID}"
VGG_MODEL_FILE = DATA_DIR / "vgg_saved_models" / f"model.{MODEL_ID}"


In [6]:
SAMPLING_RATE, CHUNK_SIZE, STRIDE, FOLD_DATA, MODEL_FILE


(16000,
 16000,
 4000,
 PosixPath('data/foldData.Sr16000Cs16000Ol75.pickle'),
 PosixPath('data/2d_saved_models/model.Sr16000Cs16000Ol75'))

In [7]:
def read_chunks():
    fold_Xs, fold_ys, fold_chunk_lens = [], [], []
    for fold in range(1, 11):
        with (FOLD_DATA_DIR / f"{fold}").open("br") as f:
            X, y, chunk_lens = pickle.load(f)
            fold_Xs.append(X)
            fold_ys.append(y)
            fold_chunk_lens.append(chunk_lens)
    return fold_Xs, fold_ys, fold_chunk_lens


In [8]:
# folder structure:
# data/images/fold{i}/{classNumber}/{idx}.png
def create_melspec_image(fold, fold_X, fold_y, height=IMG_HEIGHT, width=IMG_WIDTH):
    fold_dir = IMG_DIR / f"fold{fold}"

    dpi = plt.rcParams["figure.dpi"]
    figsize = (height / dpi, width / dpi)

    for idx in range(len(fold_X)):
        img_path = fold_dir / f"{fold_y[idx].argmax()}" / f"fold{fold}-{idx}.jpg"
        if not img_path.exists():
            mel = librosa.feature.melspectrogram(y=fold_X[idx].reshape(CHUNK_SIZE))
            fig, ax = plt.subplots(figsize=figsize)
            ax.axis("off")
            librosa.display.specshow(librosa.power_to_db(mel, ref=np.max), ax=ax)

            # https://stackoverflow.com/a/65469535
            fig.subplots_adjust(
                top=1.0, bottom=0, right=1.0, left=0, hspace=0, wspace=0
            )
            img_path.parent.mkdir(parents=True, exist_ok=True)
            fig.savefig(img_path)
            plt.close(fig)


In [9]:
def build_model(input_shape):
    # build network topology
    model = Sequential()

    # 1st conv layer
    model.add(Conv2D(16, (3, 3), activation="relu", input_shape=input_shape))
    model.add(MaxPooling2D((2, 2), strides=(2, 2), padding="same"))
    model.add(BatchNormalization())

    # 2nd conv layer
    model.add(Conv2D(32, (3, 3), activation="relu"))
    model.add(MaxPooling2D((2, 2), strides=(2, 2), padding="same"))
    model.add(BatchNormalization())

    # 3rd conv layer
    model.add(Conv2D(64, (3, 3), activation="relu"))
    model.add(MaxPooling2D((2, 2), strides=(2, 2), padding="same"))
    model.add(BatchNormalization())

    # flatten output and feed it into dense layer
    model.add(Flatten())
    model.add(Dense(64, activation="relu"))
    model.add(Dropout(0.5))

    # output layer
    # model.add(keras.layers.Dense(len(NAMES), activation='softmax'))
    model.add(Dense(10, activation="softmax"))

    model.compile(
        optimizer="adam",
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model


In [10]:
# copy data from folds except test_fold to train directory and others to test directory
def copy_to_train_test_dir(test_fold):
    shutil.rmtree(TRAIN_DIR, ignore_errors=True)
    shutil.rmtree(TEST_DIR, ignore_errors=True)
    for fold in range(1, 11):
        fold_dir = IMG_DIR / f"fold{fold}"
        for classno in range(NUM_CLASSES):
            src_dir = fold_dir / f"{classno}"
            dst_dir = (TEST_DIR if fold == test_fold else TRAIN_DIR) / f"{classno}"
            dst_dir.mkdir(parents=True, exist_ok=True)
            for file in src_dir.iterdir():
                shutil.copy(file, dst_dir)

In [11]:
def sum_rule_agg(y, chunk_lens):
    return np.array(
        [res.mean(axis=0).argmax() for res in np.split(y, chunk_lens.cumsum()[:-1])]
    )


In [12]:
meta = pd.read_csv(META_CSV)
meta


Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.000000,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.500000,62.500000,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.500000,64.500000,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.000000,67.000000,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.500000,72.500000,1,5,2,children_playing
...,...,...,...,...,...,...,...,...
8727,99812-1-2-0.wav,99812,159.522205,163.522205,2,7,1,car_horn
8728,99812-1-3-0.wav,99812,181.142431,183.284976,2,7,1,car_horn
8729,99812-1-4-0.wav,99812,242.691902,246.197885,2,7,1,car_horn
8730,99812-1-5-0.wav,99812,253.209850,255.741948,2,7,1,car_horn


In [14]:
fold_Xs, fold_ys, fold_chunk_lens = read_chunks()


In [None]:
# set _dir_candidates = [] in
# /data/arpank/miniforge3/envs/dlproject/lib/python3.10/multiprocessing/heap.py
with ProcessPoolExecutor() as e:
    for fold in range(1, 11):
        e.submit(
            create_melspec_image,
            fold,
            fold_Xs[fold - 1],
            fold_ys[fold - 1],
        )
# takes 30mins


In [15]:
# make sure GPUs are available
gpus = tf.config.list_physical_devices("GPU")
gpus
# tf.config.set_visible_devices(gpus[2:], "GPU")


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:2', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:3', device_type='GPU')]

In [None]:
copy_to_train_test_dir(10)

In [None]:
train_datagen = ImageDataGenerator(rescale=1.0 / 255)
val_datagen = ImageDataGenerator(rescale=1.0 / 255)


In [None]:
train_generator = train_datagen.flow_from_directory(
    #TRAIN_DIR, target_size=(IMG_HEIGHT, IMG_WIDTH), batch_size=BATCH_SIZE
    #TRAIN_DIR, target_size=(224, 224), batch_size=BATCH_SIZE
    TRAIN_DIR, target_size=(72, 72), batch_size=BATCH_SIZE
)

validation_generator = val_datagen.flow_from_directory(
    TEST_DIR,
    #target_size=(IMG_HEIGHT, IMG_WIDTH),
    #target_size=(224, 224),
    target_size=(72, 72),
    batch_size=BATCH_SIZE,
    shuffle=False,
)


Found 90030 images belonging to 10 classes.
Found 9612 images belonging to 10 classes.


In [None]:
#model = build_model(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))
model = build_model(input_shape=(72, 72, 3))
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 70, 70, 16)        448       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 35, 35, 16)       0         
 )                                                               
                                                                 
 batch_normalization (BatchN  (None, 35, 35, 16)       64        
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 33, 33, 32)        4640      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 17, 17, 32)       0         
 2D)                                                             
                                                        

In [None]:
# callbacks to save, stop early and visualize
cpcallback = ModelCheckpoint(
    monitor="val_accuracy", filepath=MODEL_FILE, save_best_only=True, verbose=1
)
escallback = EarlyStopping(
    monitor="val_accuracy", min_delta=0, patience=10, verbose=1
)
tbcallback = TensorBoard(log_dir=LOG_DIR, histogram_freq=1)
callbacks = [cpcallback, escallback, tbcallback]


In [None]:
hist = model.fit(
    train_generator,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=validation_generator,
    callbacks=callbacks,
    verbose=1,
    shuffle=True,
)


Epoch 1/100
Epoch 1: val_accuracy improved from -inf to 0.59353, saving model to /content/drive/MyDrive/data/2d_saved_models/model.Sr16000Cs16000Ol75
INFO:tensorflow:Assets written to: /content/drive/MyDrive/data/2d_saved_models/model.Sr16000Cs16000Ol75/assets
Epoch 2/100
Epoch 2: val_accuracy did not improve from 0.59353
Epoch 3/100
Epoch 3: val_accuracy improved from 0.59353 to 0.60861, saving model to /content/drive/MyDrive/data/2d_saved_models/model.Sr16000Cs16000Ol75
INFO:tensorflow:Assets written to: /content/drive/MyDrive/data/2d_saved_models/model.Sr16000Cs16000Ol75/assets
Epoch 4/100
Epoch 4: val_accuracy did not improve from 0.60861
Epoch 5/100
Epoch 5: val_accuracy did not improve from 0.60861
Epoch 6/100
Epoch 6: val_accuracy did not improve from 0.60861
Epoch 7/100
Epoch 7: val_accuracy did not improve from 0.60861
Epoch 8/100
Epoch 8: val_accuracy improved from 0.60861 to 0.63161, saving model to /content/drive/MyDrive/data/2d_saved_models/model.Sr16000Cs16000Ol75
INFO:te

KeyboardInterrupt: ignored

In [None]:
# load the model with the best weights
model = load_model(MODEL_FILE)


In [None]:
y_pred = model.predict(validation_generator)
y_pred
score = model.evaluate(validation_generator, verbose=1)
score




[3.3545119762420654, 0.6358116865158081]

In [None]:
with (FOLD_DATA_DIR / "1").open("br") as f:
    x, y, cl = pickle.load(f)

In [None]:
#y_pred_agg = sum_rule_agg(y_pred, fold_chunk_lens[9])
y_pred_agg = sum_rule_agg(y_pred, cl)
# y_test_agg = sum_rule_agg(validation_generator.classes)
y_test_agg = sum_rule_agg(
    #to_categorical(validation_generator.classes, num_classes=NUM_CLASSES), fold_chunk_lens[9]
    to_categorical(validation_generator.classes, num_classes=NUM_CLASSES), cl
)
(y_pred_agg == y_test_agg).sum() / len(y_pred_agg), accuracy_score(
    y_test_agg, y_pred_agg
)


(0.706875753920386, 0.706875753920386)

In [None]:
confusion_matrix(y_test_agg, y_pred_agg, num_classes=NUM_CLASSES)


<tf.Tensor: shape=(10, 10), dtype=int32, numpy=
array([[61,  0,  9,  0,  0,  2,  0, 10,  1, 20],
       [ 0, 15,  0,  0,  0,  0,  0,  0,  0,  4],
       [ 0,  0, 83,  5,  0,  7,  3,  1,  2,  8],
       [ 0,  0, 17, 49,  1,  1,  0,  0,  0,  6],
       [ 0,  0,  3,  4, 75,  0,  0,  1,  0, 17],
       [22,  0, 13,  0,  0, 54,  0,  3,  0, 12],
       [ 0,  0,  0,  0,  0,  0,  8,  0,  0,  0],
       [ 0,  0,  2,  0,  2,  0,  0, 44,  0, 51],
       [ 3,  0,  9, 12,  1,  1,  0,  0, 53,  1],
       [ 1,  0,  7,  0,  0,  0,  0,  0,  0, 99]], dtype=int32)>

In [None]:
# training
accuracies = []
confusion_matrices = []
for val_idx in range(7, 10):
    print(f"Starting loop with val_idx={val_idx}")

    copy_to_train_test_dir(test_fold=val_idx+1)
    model = build_model(input_shape=(72, 72, 3))

    train_datagen = ImageDataGenerator(rescale=1.0 / 255)
    val_datagen = ImageDataGenerator(rescale=1.0 / 255)

    train_generator = train_datagen.flow_from_directory(
        TRAIN_DIR, target_size=(72, 72), batch_size=BATCH_SIZE
    )

    validation_generator = val_datagen.flow_from_directory(
        TEST_DIR,
        #target_size=(IMG_HEIGHT, IMG_WIDTH),
        #target_size=(224, 224),
        target_size=(72, 72),
        batch_size=BATCH_SIZE,
        shuffle=False,
    )

    cpcallback = ModelCheckpoint(
        monitor="val_accuracy",
        filepath=f"{MODEL_FILE}Vi{val_idx}",
        save_best_only=True,
        verbose=1
    )
    callbacks[0] = cpcallback
    hist = model.fit(
        train_generator,
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        validation_data=validation_generator,
        callbacks=callbacks,
        verbose=1,
        shuffle=True,
    )

    # load the model with the best weights
    model = load_model(f"{MODEL_FILE}Vi{val_idx}")

    y_pred = model.predict(validation_generator)

    with (FOLD_DATA_DIR / f"{val_idx+1}").open("br") as f:
        x, y, cl = pickle.load(f)

    y_pred_agg = sum_rule_agg(y_pred, cl)
    y_test_agg = sum_rule_agg(to_categorical(validation_generator.classes, num_classes=NUM_CLASSES), cl)

    acc = (y_pred_agg == y_test_agg).sum() / len(y_pred_agg)
    accuracies.append(acc)
    print(f"accuracy with val_idx={val_idx} is {acc}")

    cm = confusion_matrix(y_test_agg, y_pred_agg, num_classes=NUM_CLASSES)
    confusion_matrices.append(cm)


Starting loop with val_idx=7
Found 90536 images belonging to 10 classes.
Found 9106 images belonging to 10 classes.
Epoch 1/100
Epoch 1: val_accuracy improved from -inf to 0.60015, saving model to /content/drive/MyDrive/data/2d_saved_models/model.Sr16000Cs16000Ol75Vi7
INFO:tensorflow:Assets written to: /content/drive/MyDrive/data/2d_saved_models/model.Sr16000Cs16000Ol75Vi7/assets
Epoch 2/100
Epoch 2: val_accuracy did not improve from 0.60015
Epoch 3/100
Epoch 3: val_accuracy did not improve from 0.60015
Epoch 4/100
Epoch 4: val_accuracy did not improve from 0.60015
Epoch 5/100
Epoch 5: val_accuracy improved from 0.60015 to 0.66099, saving model to /content/drive/MyDrive/data/2d_saved_models/model.Sr16000Cs16000Ol75Vi7
INFO:tensorflow:Assets written to: /content/drive/MyDrive/data/2d_saved_models/model.Sr16000Cs16000Ol75Vi7/assets
Epoch 6/100
Epoch 6: val_accuracy did not improve from 0.66099
Epoch 7/100
Epoch 7: val_accuracy did not improve from 0.66099
Epoch 8/100
Epoch 8: val_accurac

In [None]:
#accuracies, confusion_matrices


In [20]:
# VGG16 training


def build_vgg_model(input_shape):
    # include_top=false means ignore the FC classifier part on top of the model
    base_model = VGG16(weights="imagenet", include_top=False,
                      input_shape=(72, 72, 3),
                      classes=NUM_CLASSES)

    # base_model = tf.keras.applications.resnet50.ResNet50(
    #     include_top=False,
    #     weights='imagenet',
    #     input_shape=(72, 72, 3),
    #     classes=10
    # )
    # base_model.summary()

    # freeze all layers except last CONV layer
    base_model.trainable = False
    base_model.layers[-2].trainable = True

    # add 1 Dense layer with 64 neurons
    x = Flatten()(base_model.layers[-1].output)
    x = Dense(64, activation="relu")(x)
    x = Dense(NUM_CLASSES, activation="softmax")(x)
    new_model = Model(inputs=base_model.input, outputs=x)
    new_model.compile(
            optimizer="adam",
            loss="categorical_crossentropy",
            metrics=["accuracy"],
        )
    return new_model

In [21]:
new_model = build_vgg_model((72, 72, 3))

In [22]:
copy_to_train_test_dir(10)

In [23]:
train_datagen = ImageDataGenerator(rescale=1.0 / 255)
val_datagen = ImageDataGenerator(rescale=1.0 / 255)
train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR, target_size=(72, 72), batch_size=BATCH_SIZE
)

validation_generator = val_datagen.flow_from_directory(
    TEST_DIR,
    target_size=(72, 72),
    batch_size=BATCH_SIZE,
    shuffle=False,
)


Found 90030 images belonging to 10 classes.
Found 9612 images belonging to 10 classes.


In [24]:
# callbacks to save, stop early and visualize
cpcallback = ModelCheckpoint(
    monitor="val_accuracy", filepath=VGG_MODEL_FILE, save_best_only=True, verbose=1
)
escallback = EarlyStopping(
    monitor="val_accuracy", min_delta=0, patience=5, verbose=1
)
tbcallback = TensorBoard(log_dir=LOG_DIR, histogram_freq=1)
callbacks = [cpcallback, escallback, tbcallback]


In [25]:
hist = new_model.fit(
    train_generator,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=validation_generator,
    callbacks=callbacks,
    verbose=1,
    shuffle=True,
)


Epoch 1/100
Epoch 00001: val_accuracy improved from -inf to 0.70745, saving model to data/vgg_saved_models/model.Sr16000Cs16000Ol75
INFO:tensorflow:Assets written to: data/vgg_saved_models/model.Sr16000Cs16000Ol75/assets
Epoch 2/100
Epoch 00002: val_accuracy improved from 0.70745 to 0.71119, saving model to data/vgg_saved_models/model.Sr16000Cs16000Ol75
INFO:tensorflow:Assets written to: data/vgg_saved_models/model.Sr16000Cs16000Ol75/assets
Epoch 3/100
Epoch 00003: val_accuracy improved from 0.71119 to 0.72722, saving model to data/vgg_saved_models/model.Sr16000Cs16000Ol75
INFO:tensorflow:Assets written to: data/vgg_saved_models/model.Sr16000Cs16000Ol75/assets
Epoch 4/100
Epoch 00004: val_accuracy improved from 0.72722 to 0.74875, saving model to data/vgg_saved_models/model.Sr16000Cs16000Ol75
INFO:tensorflow:Assets written to: data/vgg_saved_models/model.Sr16000Cs16000Ol75/assets
Epoch 5/100
Epoch 00005: val_accuracy did not improve from 0.74875
Epoch 6/100
Epoch 00006: val_accuracy di

In [30]:
# load the model with the best weights
model = load_model(f"{VGG_MODEL_FILE}")

y_pred = model.predict(validation_generator)

#     with (FOLD_DATA_DIR / f"{val_idx+1}").open("br") as f:
#         x, y, cl = pickle.load(f)
val_idx = 9
cl = fold_chunk_lens[val_idx]

y_pred_agg = sum_rule_agg(y_pred, cl)
y_test_agg = sum_rule_agg(to_categorical(validation_generator.classes, num_classes=NUM_CLASSES), cl)

acc = (y_pred_agg == y_test_agg).sum() / len(y_pred_agg)
print(f"accuracy with val_idx={val_idx} is {acc}")

cm = confusion_matrix(y_test_agg, y_pred_agg, num_classes=NUM_CLASSES)
cm

accuracy with val_idx=9 is 0.8076463560334528


  [res.mean(axis=0).argmax() for res in np.split(y, chunk_lens.cumsum()[:-1])]
  ret = um.true_divide(


<tf.Tensor: shape=(10, 10), dtype=int32, numpy=
array([[ 82,   0,   7,   5,   0,   0,   0,   7,   5,   4],
       [  1,  14,   0,   0,   0,   0,   0,   0,   0,   3],
       [  0,   0, 100,   6,   4,   0,   1,   0,   1,   0],
       [  0,   0,  10,  60,   0,   0,   0,   1,   0,   6],
       [  2,   0,   1,   2,  86,   0,   0,   2,   1,  10],
       [  8,   0,   8,   0,   1,  78,   0,   5,  10,   1],
       [  0,   0,   0,   0,   0,   0,   8,   1,   0,   0],
       [  0,   0,   0,   1,   1,   1,   0, 102,   0,   0],
       [  0,   1,   7,  14,   0,   0,   0,   1,  56,   1],
       [  0,   1,  16,   0,   0,   0,   0,   3,   1,  90]], dtype=int32)>

In [36]:
# training
accuracies = []
confusion_matrices = []
for val_idx in range(1, 10):
    print(f"Starting loop with val_idx={val_idx}")

    copy_to_train_test_dir(test_fold=val_idx+1)
    model = build_vgg_model(input_shape=(72, 72, 3))

    train_datagen = ImageDataGenerator(rescale=1.0 / 255)
    val_datagen = ImageDataGenerator(rescale=1.0 / 255)

    train_generator = train_datagen.flow_from_directory(
        TRAIN_DIR, target_size=(72, 72), batch_size=BATCH_SIZE
    )

    validation_generator = val_datagen.flow_from_directory(
        TEST_DIR,
        #target_size=(IMG_HEIGHT, IMG_WIDTH),
        #target_size=(224, 224),
        target_size=(72, 72),
        batch_size=BATCH_SIZE,
        shuffle=False,
    )

    cpcallback = ModelCheckpoint(
        monitor="val_accuracy",
        filepath=f"{VGG_MODEL_FILE}Vi{val_idx}",
        save_best_only=True,
        verbose=1
    )
    callbacks[0] = cpcallback
    hist = model.fit(
        train_generator,
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        validation_data=validation_generator,
        callbacks=callbacks,
        verbose=1,
        shuffle=True,
    )

    # load the model with the best weights
    model = load_model(f"{VGG_MODEL_FILE}Vi{val_idx}")

    y_pred = model.predict(validation_generator)

#     with (FOLD_DATA_DIR / f"{val_idx+1}").open("br") as f:
#         x, y, cl = pickle.load(f)
    cl = fold_chunk_lens[val_idx]

    y_pred_agg = sum_rule_agg(y_pred, cl)
    y_test_agg = sum_rule_agg(to_categorical(validation_generator.classes, num_classes=NUM_CLASSES), cl)

    acc = (y_pred_agg == y_test_agg).sum() / len(y_pred_agg)
    accuracies.append(acc)
    print(f"accuracy with val_idx={val_idx} is {acc}")

    cm = confusion_matrix(y_test_agg, y_pred_agg, num_classes=NUM_CLASSES)
    confusion_matrices.append(cm)


Starting loop with val_idx=1
Found 89706 images belonging to 10 classes.
Found 9936 images belonging to 10 classes.
Epoch 1/100
Epoch 00001: val_accuracy improved from -inf to 0.66425, saving model to data/vgg_saved_models/model.Sr16000Cs16000Ol75Vi1
INFO:tensorflow:Assets written to: data/vgg_saved_models/model.Sr16000Cs16000Ol75Vi1/assets
Epoch 2/100
Epoch 00002: val_accuracy did not improve from 0.66425
Epoch 3/100
Epoch 00003: val_accuracy did not improve from 0.66425
Epoch 4/100
Epoch 00004: val_accuracy did not improve from 0.66425
Epoch 5/100
Epoch 00005: val_accuracy did not improve from 0.66425
Epoch 6/100
Epoch 00006: val_accuracy did not improve from 0.66425
Epoch 00006: early stopping
accuracy with val_idx=1 is 0.748780487804878
Starting loop with val_idx=2
Found 88833 images belonging to 10 classes.
Found 10809 images belonging to 10 classes.
Epoch 1/100
Epoch 00001: val_accuracy improved from -inf to 0.57720, saving model to data/vgg_saved_models/model.Sr16000Cs16000Ol75V

In [37]:
accuracies

[0.748780487804878,
 0.6429378531073446,
 0.7114164904862579,
 0.8020361990950227,
 0.7312661498708011,
 0.7231920199501247,
 0.7433510638297872,
 0.7794871794871795,
 0.8281444582814446]

In [17]:
# testing
accuracies = []
confusion_matrices = []
for val_idx in range(10):
    print(f"Starting loop with val_idx={val_idx}")

    copy_to_train_test_dir(test_fold=val_idx+1)

    val_datagen = ImageDataGenerator(rescale=1.0 / 255)

    validation_generator = val_datagen.flow_from_directory(
        TEST_DIR,
        #target_size=(IMG_HEIGHT, IMG_WIDTH),
        #target_size=(224, 224),
        target_size=(72, 72),
        batch_size=BATCH_SIZE,
        shuffle=False,
    )

    # load the model with the best weights
    model = load_model(f"{VGG_MODEL_FILE}Vi{val_idx}")

    y_pred = model.predict(validation_generator)

#     with (FOLD_DATA_DIR / f"{val_idx+1}").open("br") as f:
#         x, y, cl = pickle.load(f)
    cl = fold_chunk_lens[val_idx]

    y_pred_agg = sum_rule_agg(y_pred, cl)
    y_test_agg = sum_rule_agg(to_categorical(validation_generator.classes, num_classes=NUM_CLASSES), cl)

    acc = (y_pred_agg == y_test_agg).sum() / len(y_pred_agg)
    accuracies.append(acc)
    print(f"accuracy with val_idx={val_idx} is {acc}")

    cm = confusion_matrix(y_test_agg, y_pred_agg, num_classes=NUM_CLASSES)
    confusion_matrices.append(cm)


Starting loop with val_idx=1
Found 9936 images belonging to 10 classes.


2022-04-22 15:30:56.171291: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-04-22 15:31:03.150213: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10380 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:05:00.0, compute capability: 6.1
2022-04-22 15:31:03.151124: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 1523 MB memory:  -> device: 1, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:06:00.0, compute capability: 6.1
2022-04-22 15:31:03.151691: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:loca

accuracy with val_idx=1 is 0.748780487804878
Starting loop with val_idx=2
Found 10809 images belonging to 10 classes.
accuracy with val_idx=2 is 0.6429378531073446
Starting loop with val_idx=3
Found 11131 images belonging to 10 classes.
accuracy with val_idx=3 is 0.7114164904862579
Starting loop with val_idx=4
Found 10468 images belonging to 10 classes.
accuracy with val_idx=4 is 0.8020361990950227
Starting loop with val_idx=5
Found 9368 images belonging to 10 classes.
accuracy with val_idx=5 is 0.7312661498708011
Starting loop with val_idx=6
Found 9738 images belonging to 10 classes.
accuracy with val_idx=6 is 0.7231920199501247
Starting loop with val_idx=7
Found 9106 images belonging to 10 classes.
accuracy with val_idx=7 is 0.7433510638297872
Starting loop with val_idx=8
Found 9427 images belonging to 10 classes.
accuracy with val_idx=8 is 0.7794871794871795
Starting loop with val_idx=9
Found 9612 images belonging to 10 classes.
accuracy with val_idx=9 is 0.8281444582814446


In [19]:
np.mean(accuracies), np.std(accuracies)

(0.7456235446569823, 0.05118349004535925)