In [None]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout, Activation
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.layers import Concatenate, GlobalAveragePooling2D, Reshape, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np

def load_mnist():
    # Load data
    (X_train, y_train), (X_test, y_test) = mnist.load_data()

    # Reshape data to add channel dimension
    X_train = np.expand_dims(X_train, -1)
    X_test = np.expand_dims(X_test, -1)

    # Normalize data
    X_train = X_train.astype('float32') / 255.0
    X_test = X_test.astype('float32') / 255.0

    return (X_train, y_train), (X_test, y_test)

(X_train, y_train), (X_test, y_test) = load_mnist()



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
# Base imports for model creation and compilation
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout, Activation
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.layers import Concatenate, Add, GlobalAveragePooling2D, AveragePooling2D
from tensorflow.keras.layers import SeparableConv2D, Reshape, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

# For functional API usage
from tensorflow.keras import backend as K

# Additional utilities
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.datasets import cifar10

# Make sure to use the correct imports as required for specific layers or utilities


def create_vgg_like_model(input_shape):
    model = Sequential()

    # Block 1
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # Block 2
    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    # Block 3
    model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


def create_inception_like_model(input_shape):
    inputs = Input(shape=input_shape)
    # Tower 1
    tower_1 = Conv2D(64, (1, 1), padding='same', activation='relu')(inputs)
    tower_1 = Conv2D(64, (3, 3), padding='same', activation='relu')(tower_1)

    # Tower 2
    tower_2 = Conv2D(64, (1, 1), padding='same', activation='relu')(inputs)
    tower_2 = Conv2D(64, (5, 5), padding='same', activation='relu')(tower_2)

    # Tower 3
    tower_3 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(inputs)
    tower_3 = Conv2D(64, (1, 1), padding='same', activation='relu')(tower_3)

    output = Concatenate(axis=-1)([tower_1, tower_2, tower_3])
    output = Flatten()(output)
    output = Dense(256, activation='relu')(output)
    output = Dropout(0.4)(output)
    output = Dense(10, activation='softmax')(output)

    model = Model(inputs=inputs, outputs=output)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

def create_mobilenet_like_model(input_shape):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), strides=(2, 2), padding='same', activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())

    model.add(SeparableConv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(SeparableConv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(BatchNormalization())

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

def create_shallow_wide_model(input_shape):
    model = Sequential([
        Conv2D(256, (3, 3), padding='same', activation='relu', input_shape=input_shape),
        Conv2D(256, (3, 3), padding='same', activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

def create_asymmetric_convolution_model(input_shape):
    model = Sequential([
        Conv2D(64, (1, 3), padding='same', activation='relu', input_shape=input_shape),
        Conv2D(64, (3, 1), padding='same', activation='relu'),
        Conv2D(128, (1, 3), padding='same', activation='relu'),
        Conv2D(128, (3, 1), padding='same', activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

def create_capsule_inspired_model(input_shape):
    model = Sequential([
        Conv2D(64, (3, 3), activation='relu', input_shape=input_shape),
        Conv2D(64, (3, 3), activation='relu'),
        Flatten(),
        Dense(160, activation='sigmoid'),  # Capsule-like dense layer
        Reshape((10, 16)),  # 10 capsules, 16 dimensions each
        Lambda(lambda x: K.sqrt(K.sum(K.square(x), axis=2))),  # Length of the vector as a capsule's output
        Activation('softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

def create_minimalistic_model(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

def create_high_dropout_model(input_shape):
    model = Sequential([
        Conv2D(128, (3, 3), padding='same', activation='relu', input_shape=input_shape),
        Dropout(0.5),
        Conv2D(128, (3, 3), padding='same', activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.5),
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

def create_increased_bn_model(input_shape):
    model = Sequential([
        Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=input_shape),
        BatchNormalization(),
        Conv2D(64, (3, 3), padding='same', activation='relu'),
        BatchNormalization(),
        MaxPooling2D(pool_size=(2, 2)),
        BatchNormalization(),
        Flatten(),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

from tensorflow.keras.layers import Input, Conv2D ,Concatenate,BatchNormalization, ReLU, Add, MaxPooling2D, GlobalAveragePooling2D, Dense, AveragePooling2D

def resnet_lite_block(input_tensor, filters, kernel_size, strides=(1, 1), activation='relu', l2_reg=0.001):
    """A simplified ResNet block."""
    x = Conv2D(filters, kernel_size, strides=strides, padding='same', kernel_regularizer=l2(l2_reg))(input_tensor)
    x = BatchNormalization()(x)
    x = ReLU()(x)

    x = Conv2D(filters, kernel_size, strides=(1, 1), padding='same', kernel_regularizer=l2(l2_reg))(x)
    x = BatchNormalization()(x)

    shortcut = Conv2D(filters, (1, 1), strides=strides, padding='same', kernel_regularizer=l2(l2_reg))(input_tensor)
    shortcut = BatchNormalization()(shortcut)

    x = Add()([x, shortcut])
    x = ReLU()(x)
    return x

def create_extreme_resnet_model(input_shape):
    inputs = Input(shape=input_shape)
    x = inputs

    # Add multiple residual blocks
    for _ in range(5):  # Extreme ResNet with 5 blocks
        x = resnet_lite_block(x, 64, (3, 3), l2_reg=0.001)
    for _ in range(5):
        x = resnet_lite_block(x, 128, (3, 3), l2_reg=0.001)

    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(10, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=x)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

models = {
    "vgg_like": create_vgg_like_model((28, 28, 1)),
    "inception_like": create_inception_like_model((28, 28, 1)),
    "mobilenet_like": create_mobilenet_like_model((28, 28, 1)),
    "shallow_wide": create_shallow_wide_model((28, 28, 1)),
    "asymmetric_conv": create_asymmetric_convolution_model((28, 28, 1)),
    "capsule_inspired": create_capsule_inspired_model((28, 28, 1)),
    "minimalistic": create_minimalistic_model((28, 28, 1)),
    "high_dropout": create_high_dropout_model((28, 28, 1)),
    "increased_bn": create_increased_bn_model((28, 28, 1)),
    "extreme_resnet": create_extreme_resnet_model((28, 28, 1))
}

batch_size = 64
epochs = 8

# Loop to compile, train, and save each model
for model_name, model in models.items():
    print(f"Training model: {model_name}")
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Early stopping and model checkpoint
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    model_checkpoint = ModelCheckpoint(f'/content/drive/My Drive/ood_generalization_proj/cnn_models/MNIST_models/checkpoints/{model_name}_best.h5', save_best_only=True)

    # Fit the model
    history = model.fit(
        X_train, y_train,
        batch_size=batch_size,
        epochs=epochs,
        validation_data=(X_test, y_test),
        callbacks=[early_stopping, model_checkpoint]
    )

    # Optionally, you can save the final model (not just the best checkpoint)
    model.save(f'/content/drive/My Drive/ood_generalization_proj/cnn_models/MNIST_models/{model_name}.keras')

Training model: vgg_like
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Training model: inception_like
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Training model: mobilenet_like
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Training model: shallow_wide
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Training model: asymmetric_conv
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Training model: capsule_inspired
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Training model: minimalistic
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Training model: high_dropout
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Training model: increased_bn
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Training mod