<a href="https://colab.research.google.com/github/h599002/AltGenFrontend/blob/main/RefinedModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Trying to reset

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, optimizers, preprocessing
from tensorflow.keras.utils import to_categorical
import keras as keras

gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

!mkdir -p ~/.kaggle
!echo '{"username":"vegardaaalbretsen","key":"18f385007d1223dd35dc94f16e311545"}' > ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json
#!kaggle datasets download -d puneet6060/intel-image-classification
#!unzip intel-image-classification.zip -d /content/dataset

!kaggle datasets download -d seryouxblaster764/fgvc-aircraft
!unzip fgvc-aircraft.zip -d /content/dataset



[1;30;43mStrømmer utdata som er avkortet til de siste 5000 linjene.[0m
  inflating: /content/dataset/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images/1240217.jpg  
  inflating: /content/dataset/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images/1240265.jpg  
  inflating: /content/dataset/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images/1240315.jpg  
  inflating: /content/dataset/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images/1240316.jpg  
  inflating: /content/dataset/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images/1240324.jpg  
  inflating: /content/dataset/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images/1240500.jpg  
  inflating: /content/dataset/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images/1240501.jpg  
  inflating: /content/dataset/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images/1240881.jpg  
  inflating: /content/dataset/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images/1240934.jpg  
  inflating: /content/dataset/fgvc-aircraft-2013b/fgvc-aircraft

In [None]:
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image_dataset_from_directory
import os

# Define the path to the dataset
dataset_path = '/content/dataset'
image_path = '/content/dataset/fgvc-aircraft-2013b/fgvc-aircraft-2013b/data/images'


# Load CSV files
train_df = pd.read_csv(os.path.join(dataset_path, 'train.csv'))
val_df = pd.read_csv(os.path.join(dataset_path, 'val.csv'))
test_df = pd.read_csv(os.path.join(dataset_path, 'test.csv'))

# Add full image paths to DataFrames
train_df['filepath'] = train_df['filename'].apply(lambda x: os.path.join(image_path, x))
val_df['filepath'] = val_df['filename'].apply(lambda x: os.path.join(image_path, x))
test_df['filepath'] = test_df['filename'].apply(lambda x: os.path.join(image_path, x))

# Convert class labels to categorical
train_df['Labels'] = train_df['Labels'].astype(str)
val_df['Labels'] = val_df['Labels'].astype(str)
test_df['Labels'] = test_df['Labels'].astype(str)

# Image and Batch size
img_size = (448, 448)
batch_size = 32

# Function to load and preprocess images with augmentation for training
def load_and_augment_image(image_path, label):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = img[:-12, :, :]  #Remove the black line on the bottom
    img = tf.image.resize(img, img_size) / 255.0  # Normalize

    # Apply Data Augmentation
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_brightness(img, max_delta=0.1)
    img = tf.image.random_contrast(img, lower=0.9, upper=1.1)
    img = tf.image.random_saturation(img, lower=0.9, upper=1.1)
    img = tf.image.random_hue(img, max_delta=0.01)

    # **Additional Transformations**
    #img = tf.image.random_crop(img, size=[200, 200, 3])  # ✅ Random Cropping (optional)
    #img = tf.image.resize(img, (224, 224))  # Resize Back

    return img, label

# Function to load images without augmentation for validation/testing
def load_image(image_path, label):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = img[:-12, :, :]  # Remove the black line on the bottom
    img = tf.image.resize(img, img_size) / 255.0  # Normalize
    label = tf.reshape(label, [])
    return img, label

# Convert DataFrames to TensorFlow datasets
def dataframe_to_dataset(df, batch_size=batch_size, shuffle=True, augment=False):
    file_paths = df['filepath'].values
    labels = df['Labels'].astype('category').cat.codes.values  # Convert labels to numerical format
    dataset = tf.data.Dataset.from_tensor_slices((file_paths, labels))

    if augment:
        dataset = dataset.map(lambda x, y: load_and_augment_image(x, y), num_parallel_calls=tf.data.AUTOTUNE)
    else:
        dataset = dataset.map(lambda x, y: load_image(x, y), num_parallel_calls=tf.data.AUTOTUNE)

    if shuffle:
        dataset = dataset.shuffle(len(df))
    dataset = dataset.batch(batch_size, drop_remainder=True).prefetch(2)  # Ensure consistency
    return dataset

# Create datasets using tf.data pipeline with augmentation for training
train_airplane = dataframe_to_dataset(train_df, batch_size=batch_size, augment=True)
val_airplane = dataframe_to_dataset(val_df, batch_size=batch_size, shuffle=False, augment=False)
test_airplane = dataframe_to_dataset(test_df, batch_size=batch_size, shuffle=False, augment=False)



In [None]:
def relu6(x):
    return tf.nn.relu6(x)

def se_module(inputs, reduction=4):
    input_channels = inputs.shape[-1]
    reduced_channels = input_channels // reduction

    se = layers.GlobalAveragePooling2D()(inputs)
    se = layers.Reshape((1, 1, input_channels))(se)
    se = layers.Dense(reduced_channels, activation='relu')(se)
    se = layers.Dense(input_channels, activation='sigmoid')(se)
    return layers.multiply([inputs, se])

@tf.keras.utils.register_keras_serializable()
class StochasticDepth(tf.keras.layers.Layer):
    def __init__(self, drop_prob=0.2, **kwargs):
        super().__init__(**kwargs)
        self.drop_prob = drop_prob

    def call(self, inputs, training=None):
        if training:
            keep_prob = 1.0 - self.drop_prob
            batch_size = tf.shape(inputs)[0]
            random_tensor = keep_prob + tf.random.uniform([batch_size, 1, 1, 1], dtype=inputs.dtype)
            binary_tensor = tf.floor(random_tensor)  # Convert to 0 or 1
            return inputs * binary_tensor / keep_prob  # Scale for variance correction
        return inputs

def inverted_residual_block(inputs, expansion_factor, output_channels, stride, use_se=True, dropout_rate=0.15, drop_prob=0.2):
    input_channels = inputs.shape[-1]
    expanded_channels = input_channels * expansion_factor

    # Expansion Phase
    x = layers.Conv2D(expanded_channels, kernel_size=1, padding='same', use_bias=False)(inputs)
    x = layers.Activation("relu6")(x)
    x = layers.BatchNormalization()(x)

    # Depthwise Convolution
    x = layers.DepthwiseConv2D(kernel_size=3, strides=stride, padding='same', use_bias=False)(x)
    x = layers.Activation("relu6")(x)
    x = layers.BatchNormalization()(x)

    # Squeeze-and-Excitation Module (Optional)
    if use_se:
        x = se_module(x)

    # Dropout to reduce overfitting
    x = layers.Dropout(dropout_rate)(x)

    # Projection Phase
    x = layers.Conv2D(output_channels, kernel_size=1, padding='same', use_bias=False, kernel_regularizer=l2(1e-4))(x)
    x = layers.BatchNormalization()(x)

    # ✅ Apply Stochastic Depth before residual connection
    x = StochasticDepth(drop_prob=drop_prob)(x)

    # Residual Connection
    if stride == 1 and input_channels == output_channels:
        x = layers.add([inputs, x])

    return x

from tensorflow.keras import layers

def residual_block(inputs, output_channels, stride=1, use_se=True, dropout_rate=0.15, drop_prob=0.2):
    input_channels = inputs.shape[-1]

    # First Convolution
    x = layers.Conv2D(output_channels, kernel_size=3, strides=stride, padding='same', use_bias=False)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    # Second Convolution
    x = layers.Conv2D(output_channels, kernel_size=3, padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)

    # Squeeze-and-Excitation Module (Optional)
    if use_se:
        x = se_module(x)

    # Dropout to reduce overfitting
    x = layers.Dropout(dropout_rate)(x)

    # Apply Stochastic Depth before residual connection
    x = StochasticDepth(drop_prob=drop_prob)(x)

    # Residual Connection
    if stride == 1 and input_channels == output_channels:
        x = layers.add([inputs, x])

    return x

def scale_parameters(base_depth, base_width, phi, alpha=1.2, beta=1.1, gamma=1.15):
    depth = int(base_depth * alpha ** phi)
    width = int(base_width * beta ** phi)
    return depth, width

from tensorflow.keras.regularizers import l2

def build_scaled_model(phi, input_shape=(448,448, 3), num_classes=6):
    base_depth = 4   # Number of IR blocks per stage
    base_width = 72  # Initial number of filters

    depth, width = scale_parameters(base_depth, base_width, phi)

    inputs = layers.Input(shape=(448,448,3))

    # Initial Conv Layer
    x = layers.Conv2D(width, kernel_size=3, strides=2, padding='same', use_bias=False)(inputs)
    x = layers.Activation("relu6")(x)
    x = layers.BatchNormalization()(x)

    # Stacking Inverted Residual Blocks with Scaling & Stochastic Depth
    for i in range(depth):
        stride = 1 if i > 0 else 2  # Stride 2 for first block in each stage
        drop_prob = min(0.05 + (0.02 * i), 0.2)
        x = residual_block(x,
                           #expansion_factor=4,
                           output_channels=width,
                           stride=stride,
                           use_se=True,
                           drop_prob=drop_prob)
        width *= 2  # Increase filters at each stage

    # Final Global Pooling and Classifier
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(num_classes, activation='softmax')(x)

    model = tf.keras.Model(inputs, x)
    return model



In [None]:
small_model = build_scaled_model(phi=0)  # Small
small_model.summary()
print(len(small_model.layers))

54


In [None]:
from tensorflow.keras.optimizers import Nadam

total_epochs = 110
initial_lr = 3e-5
max_lr = 1e-3
min_lr = 1e-7

optimizer = Nadam(learning_rate=initial_lr, clipnorm=1.0)

# Implement the 1cycle learning rate policy
def lr_schedule(epoch, lr):
    if epoch < total_epochs * 0.4:  # Ramp-up (0-40%)
        lr = initial_lr + (max_lr - initial_lr) * (epoch / (total_epochs * 0.4))
    elif epoch < total_epochs * 0.8:  # Ramp-down (40-80%)
        lr = max_lr - (max_lr - initial_lr) * ((epoch - total_epochs * 0.4) / (total_epochs * 0.4))
    else:  # Final decay (80-100%)
        lr = initial_lr - (initial_lr - min_lr) * ((epoch - total_epochs * 0.8) / (total_epochs * 0.2))
    return lr
# Callbacks
lr_scheduler = tf.keras.callbacks.LearningRateScheduler(lr_schedule)

In [None]:
# Automatically logs training metrics and hyperparameters
#!pip install wandb
import wandb
from wandb.integration.keras import WandbCallback
from tensorflow.keras.optimizers import Nadam

wandb.init(project="Airplane-track")
wandb_callback = WandbCallback(save_graph=False, save_model=False)
print(wandb.run.name)
#optimizer = Nadam(learning_rate=3e-4, clipnorm=1.0)
#Compile the model
small_model.compile(
    optimizer=optimizer,
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
)
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    wandb.run.name + ".keras",
    monitor='val_loss',  # monitor validation loss or any desired metric
    verbose=1,
    save_best_only=True,
    mode='min'  # 'min' for metrics like loss, 'max' for accuracy
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33m599002[0m ([33m599002-h-gskulen-p-vestlandet[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin




likely-wind-52


In [None]:

history = small_model.fit(
    train_airplane,
    epochs=110,
    validation_data=val_airplane,
    callbacks=[
        #early_stopping,
        lr_scheduler,
        checkpoint,
        wandb_callback],
)

import matplotlib.pyplot as plt

# Retrieve training history
accuracy = history.history["accuracy"]
val_accuracy = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(1, len(accuracy) + 1)
learning_rate = history.history["learning_rate"]

# Plot accuracy
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs, accuracy, "bo-", label="Training Accuracy")
plt.plot(epochs, val_accuracy, "ro-", label="Validation Accuracy")
plt.title("Training & Validation Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.grid()

# Plot loss
plt.subplot(1, 2, 2)
plt.plot(epochs, loss, "bo-", label="Training Loss")
plt.plot(epochs, val_loss, "ro-", label="Validation Loss")
plt.title("Training & Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.grid()

plt.show()

# Plot loss
plt.figure(figsize=(12, 5))
plt.plot(epochs, learning_rate, "bo-", label="Learning rate")
plt.title("Learning rate")
plt.xlabel("Epochs")
plt.ylabel("Lr")
plt.legend()
plt.grid()

plt.show()

wandb.log({
    "Final Training Accuracy": history.history["accuracy"][-1],
    "Final Validation Accuracy": history.history["val_accuracy"][-1],
    "Final Training Loss": history.history["loss"][-1],
    "Final Validation Loss": history.history["val_loss"][-1],
})

test_loss, test_accuracy = small_model.evaluate(test_airplane)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")
wandb.log({
    "Test Accuracy": test_accuracy,
    "Test Loss": test_loss,
})
wandb.save(wandb.run.name + ".keras")
wandb.finish()


Epoch 1/110
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221ms/step - accuracy: 0.0078 - loss: nan
Epoch 1: val_loss did not improve from inf
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 359ms/step - accuracy: 0.0079 - loss: nan - val_accuracy: 0.0102 - val_loss: nan - learning_rate: 3.0000e-05
Epoch 2/110
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step - accuracy: 0.0121 - loss: nan
Epoch 2: val_loss did not improve from inf
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 284ms/step - accuracy: 0.0120 - loss: nan - val_accuracy: 0.0102 - val_loss: nan - learning_rate: 5.2045e-05
Epoch 3/110
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221ms/step - accuracy: 0.0109 - loss: nan
Epoch 3: val_loss did not improve from inf
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 284ms/step - accuracy: 0.0108 - loss: nan - val_accuracy: 0.0102 - val_loss: nan - learning