In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

plt.style.use(['seaborn-v0_8-colorblind', 'seaborn-v0_8-darkgrid'])
plt.rcParams.update({'font.size': 20})

np.set_printoptions(suppress=True, precision=4)

# Automatically reload your external source code
%load_ext autoreload
%autoreload 2

In [None]:
from vgg_nets import VGG4Plus, VGG15, VGG15Plus, VGG15PlusPlus, VGG16Plus, VGG16PlusPlus
from vgg_nets import VGG8, VGG8OnOff, VGG8OnOffNoReduction, VGG15PlusPlusOffOn, VGG15PlusPlusOffOn
from datasets import get_dataset
import datasets
import time

In [15]:
# load in data
x_train, y_train, x_val, y_val, x_test, y_test, classnames = datasets.get_dataset('cifar10', val_prop=0.2)
results = []

In [9]:
SEED = 7
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.keras.backend.clear_session()

model = VGG15PlusPlus(C=10, input_feats_shape=(32, 32, 3), wt_init='he')

model.compile(optimizer='adamw')

# train the model
start_time = time.time()
print(f"Starting training for VGG15PlusPlus...")

train_loss_hist, val_loss_hist, val_acc_hist, epochs = model.fit(
    x_train, y_train, 
    x_val, y_val, 
    max_epochs=100,
    patience=15,
    lr_patience=4,
    verbose=True,
    lr_decay_factor=0.5,
    lr_max_decays=12
)

training_time = time.time() - start_time
print(f"Training completed in {training_time:.2f} seconds ({training_time/60:.2f} minutes)")

# evaluate on test set
test_acc, test_loss = model.evaluate(x_test, y_test)
print(f"{model_type} Test Accuracy: {test_acc:.4f}")

result = {
    'model_type': model_type,
    'test_accuracy': test_acc,
    'test_loss': test_loss,
    'train_loss_history': train_loss_hist,
    'val_loss_history': val_loss_hist,
    'val_acc_history': val_acc_hist,
    'epochs': epochs,
    'training_time': training_time
}
results.append(result)

---------------------------------------------------------------------------
Dense layer output(output) shape: [1, 10]
dense_block:
	Dropout layer output(dense_block/dropout_layer_0) shape: [1, 512]
	Dense layer output(dense_block/dense_layer_0) shape: [1, 512]
Flatten layer output(flatten) shape: [1, 512]
conv_block_5:
	Dropout layer output(conv_block_5/dropout_layer_2) shape: [1, 1, 1, 512]
	MaxPool2D layer output(conv_block_5/max_pool_layer_2) shape: [1, 1, 1, 512]
	Conv2D layer output(conv_block_5/conv_layer_2) shape: [1, 2, 2, 512]
	Conv2D layer output(conv_block_5/conv_layer_1) shape: [1, 2, 2, 512]
	Conv2D layer output(conv_block_5/conv_layer_0) shape: [1, 2, 2, 512]
conv_block_4:
	Dropout layer output(conv_block_4/dropout_layer_2) shape: [1, 2, 2, 512]
	MaxPool2D layer output(conv_block_4/max_pool_layer_2) shape: [1, 2, 2, 512]
	Conv2D layer output(conv_block_4/conv_layer_2) shape: [1, 4, 4, 512]
	Conv2D layer output(conv_block_4/conv_layer_1) shape: [1, 4, 4, 512]
	Conv2D layer

KeyboardInterrupt: 

In [None]:
SEED = 1
np.random.seed(SEED)
tf.random.set_seed(SEED)

results = []
all_models = ["VGG4Plus", "VGG15", "VGG15Plus", "VGG15PlusPlus", "VGG15PlusPlusOffOn", ]

# get dataset with validation split
x_train, y_train, x_val, y_val, x_test, y_test, classnames = get_dataset('cifar10', val_prop=0.2)

# loop through each model to train
for model_idx, model_type in enumerate(all_models):
    # clear session to free memory
    tf.keras.backend.clear_session()
    
    current_seed = SEED + model_idx
    tf.random.set_seed(current_seed)
    np.random.seed(current_seed)
    
    print(f"\n{'='*20} Training {model_type} {'='*20}")
    
    if model_type == "VGG4Plus":
        model = VGG4Plus(C=10, input_feats_shape=(32, 32, 3), wt_init='he')
    elif model_type == "VGG15":
        model = VGG15(C=10, input_feats_shape=(32, 32, 3), wt_init='he')
    elif model_type == "VGG15Plus":
        model = VGG15Plus(C=10, input_feats_shape=(32, 32, 3), wt_init='he')
    elif model_type == "VGG15PlusPlus":
        model = VGG15PlusPlus(C=10, input_feats_shape=(32, 32, 3), wt_init='he')
    elif model_type == "VGG15PlusPlus":
        model = VGG15PlusPlus(C=10, input_feats_shape=(32, 32, 3), wt_init='he')
    elif model_type == "VGG15PlusPlusOffOn":
        model = VGG15PlusPlusOffOn(C=10, input_feats_shape=(32, 32, 3), wt_init='he')
    
    # compile with AdamW optimizer
    model.compile(optimizer='adamw')
    
    # train the model
    start_time = time.time()
    print(f"Starting training for {model_type}...")
    
    train_loss_hist, val_loss_hist, val_acc_hist, epochs = model.fit(
        x_train, y_train, 
        x_val, y_val, 
        max_epochs=100,
        patience=15,
        lr_patience=4,
        verbose=True,
        lr_decay_factor=0.5,
        lr_max_decays=12
    )
    
    training_time = time.time() - start_time
    print(f"Training completed in {training_time:.2f} seconds ({training_time/60:.2f} minutes)")
    
    # evaluate on test set
    test_acc, test_loss = model.evaluate(x_test, y_test)
    print(f"{model_type} Test Accuracy: {test_acc:.4f}")
    
    result = {
        'model_type': model_type,
        'test_accuracy': test_acc,
        'test_loss': test_loss,
        'train_loss_history': train_loss_hist,
        'val_loss_history': val_loss_hist,
        'val_acc_history': val_acc_hist,
        'epochs': epochs,
        'training_time': training_time
    }
    results.append(result)
    
    # Save individual results in case notebook crashes
    np.save(f"{model_type}_results_10b.npy", result)
    print(f"Saved results for {model_type}")



---------------------------------------------------------------------------
Dense layer output(output_layer) shape: [1, 10]
Dropout layer output(dropout_layer_3) shape: [1, 128]
Dense layer output(dense_layer_3) shape: [1, 128]
Flatten layer output(flatten_layer_2) shape: [1, 16384]
MaxPool2D layer output(max_pool_layer_1) shape: [1, 16, 16, 64]
Conv2D layer output(conv_layer_2) shape: [1, 32, 32, 64]
Conv2D layer output(conv_layer_1) shape: [1, 32, 32, 64]
---------------------------------------------------------------------------
Starting training for VGG4Plus...
Epoch 1/100: Train Loss: 1.8623, Val Loss: 1.3538, Val Acc: 0.5233, Time: 10.66s
Epoch 2/100: Train Loss: 1.4332, Val Loss: 1.2332, Val Acc: 0.5792, Time: 9.86s
Epoch 3/100: Train Loss: 1.2457, Val Loss: 1.0930, Val Acc: 0.6242, Time: 9.83s
Epoch 4/100: Train Loss: 1.1278, Val Loss: 1.0274, Val Acc: 0.6477, Time: 9.81s
Epoch 5/100: Train Loss: 1.0338, Val Loss: 1.0027, Val Acc: 0.6597, Time: 9.81s
Epoch 6/100: Train Loss: 0