# Normal Code Training

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.utils import to_categorical
import time
from datetime import datetime
import os
import psutil

# Load CIFAR-10 dataset
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

# Normalize pixel values to be between 0 and 1
train_images = train_images / 255.0
test_images = test_images / 255.0

# Convert labels to one-hot encoded vectors
num_classes = 10
train_labels = to_categorical(train_labels, num_classes)
test_labels = to_categorical(test_labels, num_classes)

# Define the model architecture
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=train_images.shape[1:]))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Record the start time
start_time = time.time()

# Train the model
epochs = 10
batch_size = 128
model.fit(train_images, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(test_images, test_labels))

# Record the end time
end_time = time.time()

# Get the RAM usage of the current process
process = psutil.Process(os.getpid())
memory_usage = process.memory_info().rss / 1024 / 1024  # in MB

print("RAM usage: ", memory_usage, " MB")

# Calculate the total training time in seconds
total_training_time = end_time - start_time

print("Total training time: ", total_training_time, " seconds.")

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_images, test_labels)
print('Test accuracy:', test_acc)


# layer-to-layer training

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
import time
import os
import psutil

num_classes = 10
# Load CIFAR10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalize the input data
x_train = x_train / 255.0
x_test = x_test / 255.0

# One-hot encode the output labels
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Record the start time
start_time = time.time()

# Define the model architecture
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=x_train.shape[1:]))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
# model = Model(inputs=input_tensor, outputs=output_tensor)

models = []
# Train the model for each set of layers
num_layers = len(model.layers)
for i in range(num_layers // 2):
    # Choose the layers to train
    first_layer = i
    last_layer = num_layers - i - 1
    
    # Freeze all layers except the chosen ones
    for j, layer in enumerate(model.layers):
        if j < first_layer or j > last_layer:
            layer.trainable = False
        else:
            layer.trainable = True
    # Define the model checkpoint
    # checkpoint_path = f"model_checkpoint_{i}.h5"
    # model_checkpoint = ModelCheckpoint(
    #     checkpoint_path,
    #     save_best_only=True,
    #     save_weights_only=True,
    #     monitor='val_accuracy',
    #     mode='max',
    #     verbose=1
    # )
    # Compile the model with frozen and trainable layers
    model.compile(loss='categorical_crossentropy',
                  optimizer=Adam(lr=0.001),
                  metrics=['accuracy'])
    
    # Train the model for 10 epochs with the chosen layers
    model.fit(x_train, y_train,
              batch_size=128,
              epochs=10,
              validation_data=(x_test, y_test))
              # callbacks=[model_checkpoint])
    models.append(model)
    # Print the accuracies for each epoch
    history = model.history.history
    print(f"Training accuracy after {i+1} set of layers: {history['accuracy']}")
    print(f"Validation accuracy after {i+1} set of layers: {history['val_accuracy']}")

    # # Evaluate the model
    # loss, acc = model.evaluate(x_test, y_test, verbose=0)
    # print("Accuracy for model with layers %d to %d: %.2f%%" % (first_layer, last_layer, acc * 100))

# Record the end time
end_time = time.time()
# Get the RAM usage of the current process
process = psutil.Process(os.getpid())
memory_usage = process.memory_info().rss / 1024 / 1024  # in MB

print("RAM usage: ", memory_usage, " MB")
# Calculate the total training time in seconds
total_training_time = end_time - start_time

print("Total training time: ", total_training_time, " seconds.")

# Ensemble all models
num_models = len(models)
predictions = []
for i in range(num_models):
    model = models[i]
    y_pred = model.predict(x_test)
    predictions.append(y_pred)

# Take average of all predictions
y_pred_ensemble = sum(predictions) / num_models

# Evaluate the ensemble model
accuracy = tf.keras.metrics.CategoricalAccuracy()
accuracy.update_state(y_test, y_pred_ensemble)
print("Ensemble accuracy:", accuracy.result().numpy())