In [2]:
# GPU and TF setup
import os
import tensorflow as tf
print('TensorFlow version:', tf.__version__)

# Detect GPUs and configure memory growth safely
gpus = tf.config.list_physical_devices('GPU')
print('GPUs detected:', gpus)
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print('Enabled memory growth for GPUs')
    except RuntimeError as e:
        print('Error setting memory growth:', e)
else:
    print('No GPU detected â€” training will run on CPU (much slower)')

# Enable mixed precision only when a GPU is present and TF supports it
try:
    if gpus and tf.test.is_built_with_cuda():
        from tensorflow.keras import mixed_precision
        policy = mixed_precision.Policy('mixed_float16')
        mixed_precision.set_global_policy(policy)
        print('Mixed precision enabled:', mixed_precision.global_policy())
    else:
        print('Mixed precision not enabled: no compatible GPU or CUDA support')
except Exception as e:
    print('Mixed precision not enabled:', e)

# Quick GPU computation test (sanity check)
if gpus:
    try:
        with tf.device('/GPU:0'):
            a = tf.random.uniform((512, 512))
            b = tf.random.uniform((512, 512))
            c = tf.matmul(a, b)
            print('Matrix multiply device:', c.device)
    except Exception as e:
        print('GPU computation test failed:', e)
else:
    print('Skipping GPU matmul test (no GPU)')

An error occurred: module 'importlib.metadata' has no attribute 'packages_distributions'




TensorFlow version: 2.10.0
GPUs detected: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Enabled memory growth for GPUs
Your GPU may run slowly with dtype policy mixed_float16 because it does not have compute capability of at least 7.0. Your GPU:
  NVIDIA GeForce MX150, compute capability 6.1
See https://developer.nvidia.com/cuda-gpus for a list of GPUs and their compute capabilities.
Mixed precision enabled: <Policy "mixed_float16">
Matrix multiply device: /job:localhost/replica:0/task:0/device:GPU:0


In [None]:
# Paths: change as needed
BASE_DIR = r'M:\Term 9\Grad\Main\Sign-Language-Recognition-System-main\Sign-Language-Recognition-System-main\Sign_to_Sentence Project Main\Datasets\Dataset (ArASL)\ArASL Database'
DATASET_DIR = os.path.join(BASE_DIR, 'ArASL_Database')
TEST_DIR = os.path.join(BASE_DIR, 'ArASL_35')
print('Dataset:', DATASET_DIR)
print('Test folder:', TEST_DIR)

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import os

# Define the dataset directory
dataset_dir = DATASET_DIR  # Use the same DATASET_DIR as above

# Parameters
batch_size = 32
img_height = 128
img_width = 128
validation_split = 0.2
seed = 123

print("Loading Training Dataset...")
train_ds = tf.keras.utils.image_dataset_from_directory(
  dataset_dir,
  validation_split=validation_split,
  subset="training",
  seed=seed,
  image_size=(img_height, img_width),
  batch_size=batch_size)

print("\nLoading Validation Dataset...")
val_ds = tf.keras.utils.image_dataset_from_directory(
  dataset_dir,
  validation_split=validation_split,
  subset="validation",
  seed=seed,
  image_size=(img_height, img_width),
  batch_size=batch_size)

# Get class names
class_names = train_ds.class_names
print(f"\nClass names ({len(class_names)}): {class_names}")

# Visualize 9 images from the training set
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")
plt.show()

# Optimize for performance (Autotuning)
# This keeps data in memory (cache) and prepares the next batch while the GPU is working (prefetch)
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

print("\nDatasets are optimized for training (Cached & Prefetched).")

## Parameters and data pipeline
We use `tf.keras.utils.image_dataset_from_directory` with caching and prefetching for best throughput to the GPU.

In [None]:
import math
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.data import AUTOTUNE

IMG_SIZE = 128  # MobileNetV2 works well at 128 or 160
BATCH_SIZE = 64
SEED = 123
VAL_SPLIT = 0.2

print('Image size:', IMG_SIZE)
print('Batch size:', BATCH_SIZE)

# Create train and validation datasets
train_ds = image_dataset_from_directory(
    DATASET_DIR,
    labels='inferred',
    label_mode='categorical',
    validation_split=VAL_SPLIT,
    subset='training',
    seed=SEED,
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE
)
val_ds = image_dataset_from_directory(
    DATASET_DIR,
    labels='inferred',
    label_mode='categorical',
    validation_split=VAL_SPLIT,
    subset='validation',
    seed=SEED,
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE
)

class_names = train_ds.class_names
NUM_CLASSES = len(class_names)
print('Classes ({}):'.format(NUM_CLASSES), class_names)

# Performance: cache + prefetch. Important changes:
# - Do not divide by 255 in the dataset mapping when using `preprocess_input` inside the model.
# - Keep augmentation inside the model graph (data_augmentation layer) so augmentation uses vectorized ops
#   and does not duplicate work in the tf.data pipeline.
def preprocess(image, label):
    # Keep image in [0,255] range as `tf.keras.applications.mobilenet_v2.preprocess_input`
    # expects inputs in that range. We only cast to float32 here.
    image = tf.cast(image, tf.float32)
    return image, label

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.map(preprocess, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(preprocess, num_parallel_calls=AUTOTUNE)

# Optional augmentation layer (applied in the model graph to avoid double work in pipeline)
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomRotation(0.05),
    tf.keras.layers.RandomZoom(0.05),
    tf.keras.layers.RandomTranslation(0.05, 0.05),
], name='data_augmentation')

# Keep dataset pipelines lightweight: cache then prefetch with AUTOTUNE
train_ds = train_ds.cache().prefetch(AUTOTUNE)
val_ds = val_ds.cache().prefetch(AUTOTUNE)

# Use dataset cardinality (number of batches) as steps per epoch
STEPS_PER_EPOCH = int(tf.data.experimental.cardinality(train_ds).numpy())
VALIDATION_STEPS = int(tf.data.experimental.cardinality(val_ds).numpy())
print('Steps per epoch (approx):', STEPS_PER_EPOCH)
print('Validation steps (approx):', VALIDATION_STEPS)

## Build MobileNetV2 model (transfer learning)
We freeze the base model for initial training, use BatchNorm and Dropout in the head, and ensure final Dense uses `dtype='float32'` to avoid loss scaling issues when using mixed precision.

In [None]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.models import Model
import tensorflow as tf

print('Building MobileNetV2 base...')
with tf.device('/GPU:0' if tf.config.list_physical_devices('GPU') else '/CPU:0'):
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
    base_model.trainable = False

    inputs = Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    x = data_augmentation(inputs)  # small augmentation in model graph as well (optional)
    x = tf.keras.applications.mobilenet_v2.preprocess_input(x)
    x = base_model(x, training=False)
    x = GlobalAveragePooling2D()(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)
    outputs = Dense(NUM_CLASSES, activation='softmax', dtype='float32', name='predictions')(x)

    model = Model(inputs, outputs)

model.summary()

# Compile model with a conservative learning rate for transfer learning
model.compile(
    optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
print('Model compiled.')

## Callbacks and initial training
We save the best model by `val_accuracy`, reduce LR on plateau, and log to CSV.

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, CSVLogger, EarlyStopping

checkpoint_cb = ModelCheckpoint('mobilenet_arabic_best_initial.h5', monitor='val_accuracy', save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-7, verbose=1)
csv_logger = CSVLogger('training_initial.csv')
earlystop = EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True, verbose=1)

INITIAL_EPOCHS = 8
print('Starting initial training for', INITIAL_EPOCHS, 'epochs...')
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=INITIAL_EPOCHS,
    callbacks=[checkpoint_cb, reduce_lr, csv_logger],
    verbose=1
)
print('Initial training completed.')

## Fine-tuning: unfreeze last N layers and continue training with smaller LR
Unfreeze the top of the base model and train with a smaller LR to refine pre-trained weights for your dataset.

In [None]:
# Unfreeze some layers for fine-tuning
NUM_UNFREEZE = 30  # adjust depending on dataset & GPU memory

# Make base_trainable from some layer onwards
for layer in base_model.layers[-NUM_UNFREEZE:]:
    layer.trainable = True

# Recompile with lower LR
model.compile(
    optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
model.summary()

FINE_TUNE_EPOCHS = 10
total_epochs = INITIAL_EPOCHS + FINE_TUNE_EPOCHS
print('Starting fine-tuning for', FINE_TUNE_EPOCHS, 'epochs (total epochs:', total_epochs, ')')

checkpoint_cb2 = ModelCheckpoint('mobilenet_arabic_best_finetuned.h5', monitor='val_accuracy', save_best_only=True, verbose=1)
csv_logger2 = CSVLogger('training_finetune.csv')

history_finetune = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=total_epochs,
    initial_epoch=INITIAL_EPOCHS,
    callbacks=[checkpoint_cb2, reduce_lr, csv_logger2],
    verbose=1
)
print('Fine-tuning completed.')

# Save final model (HDF5 or SavedModel)
model.save('mobilenet_arabic_final.h5')
print('Saved final model to mobilenet_arabic_final.h5')

## Evaluate and visualize training history
Plot accuracy and loss curves, and optionally compute confusion matrix on a held-out test set.

In [None]:
import matplotlib.pyplot as plt

def plot_history(h, title_suffix=''):
    hist = h.history
    plt.figure(figsize=(12,4))
    plt.subplot(1,2,1)
    plt.plot(hist.get('accuracy', []), label='train_acc')
    plt.plot(hist.get('val_accuracy', []), label='val_acc')
    plt.title('Accuracy '+title_suffix)
    plt.legend()
    plt.grid(True)

    plt.subplot(1,2,2)
    plt.plot(hist.get('loss', []), label='train_loss')
    plt.plot(hist.get('val_loss', []), label='val_loss')
    plt.title('Loss '+title_suffix)
    plt.legend()
    plt.grid(True)
    plt.show()

plot_history(history, '(initial)')
plot_history(history_finetune, '(finetune)')

## Quick inference example (single image)
Load a single image, run prediction, and show the top result.

In [None]:
import numpy as np
import cv2
from tensorflow.keras.models import load_model

def predict_image(img_path, model_path='mobilenet_arabic_best_finetuned.h5'):
    model = load_model(model_path)
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = img.astype('float32') / 255.0
    x = np.expand_dims(img, axis=0)
    preds = model.predict(x)
    idx = np.argmax(preds[0])
    return class_names[idx], float(np.max(preds[0]))

# Example (change path to a real file on your system)
#label, score = predict_image(r'M:\\path\\to\\example.jpg')
#print('Predicted:', label, 'score:', score)

---
Notebook created: `Mobilenet-arabic-optimized.ipynb`.
Next steps: run the cells in order. If you want, I can update your original notebook in-place, or run a smaller smoke test here (I cannot execute code in your environment without you running it).