In [36]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import os
from PIL import Image
import shutil  # For copying files
import pandas as pd  # For reading the CSV files
from google.colab import files # For triggering the download

Downloading the dataset

In [38]:
#Cloning the dataset

try:
    if os.path.exists("observations"):
        print("Repository already exists. Skipping clone.")
    else:
        !git clone https://github.com/prajnasb/observations.git
        print("Repository cloned successfully.")
except Exception as e:
    print(f"Error in dataset cloning: {e}")
    # Stop execution if cloning fails
    raise e

Repository already exists. Skipping clone.


In [39]:
# Define file paths
experiments_path = 'observations/experiements/dest_folder/'
data_path = 'observations/experiements/data/'

# This is the helper function from the script you found
def copy_images_to_path(file_path, file_class, destination_dir):
    font_folder = os.path.join(destination_dir, file_class)
    if not os.path.exists(font_folder):
        os.makedirs(font_folder)

    # We'll just copy, no need to print every one
    shutil.copy(file_path, font_folder)

# This function will read a CSV and run the copy process
def process_csv(csv_name, destination_dir):
    file_path = os.path.join(experiments_path, csv_name)
    df = pd.read_csv(file_path, delimiter=',')

    print(f"Processing {csv_name}: Found {len(df)} images to copy to {destination_dir}...")

    # Create the destination directory if it doesn't exist
    if not os.path.exists(destination_dir):
        os.makedirs(destination_dir)

    copied_count = 0
    for row in df.iterrows():
        # Get source path and class
        source_file_path = os.path.join(data_path, row[1]['class'], row[1]['filename'])
        file_class = row[1]['class']

        # Copy the file
        if os.path.exists(source_file_path):
            copy_images_to_path(source_file_path, file_class, destination_dir)
            copied_count += 1
        else:
            print(f"Warning: Source file not found: {source_file_path}")

    print(f"Finished copying {copied_count} images for {csv_name}.")

# --- Run the setup process ---
# We use the 'train' and 'val' folders from the ls output
TRAIN_DIR = os.path.join(experiments_path, 'train')
VAL_DIR = os.path.join(experiments_path, 'val')

# Run the population script for train.csv
process_csv('train.csv', TRAIN_DIR)

# Run the population script for test.csv (this is the fix)
process_csv('test.csv', VAL_DIR)

# Now that the folders are populated, we must clean them of bad files
# that tensorflow cannot read.

def clean_directory(directory_path):
    print(f"Cleaning directory: {directory_path}...")
    removed_count = 0
    # os.walk goes through all subfolders (e.g., /train/mask, /train/no_mask)
    for root, dirs, filenames in os.walk(directory_path):
        for filename in filenames:
            file_path = os.path.join(root, filename)
            try:
                # Use PIL's verify() method
                img = Image.open(file_path)
                img.verify() # This checks for file integrity.
                img.close() # Must close file after verify
            except Exception as e:
                # If PIL fails, the file is bad.
                print(f"Removing corrupt file: {file_path} (Error: {e})")
                os.remove(file_path)
                removed_count += 1
    print(f"Cleaning complete. Removed {removed_count} corrupt file(s) from {directory_path}.")


# Clean both the training and validation directories
clean_directory(TRAIN_DIR)
clean_directory(VAL_DIR)

Processing train.csv: Found 1178 images to copy to observations/experiements/dest_folder/train...
Finished copying 1178 images for train.csv.
Processing test.csv: Found 194 images to copy to observations/experiements/dest_folder/val...
Finished copying 194 images for test.csv.
Cleaning directory: observations/experiements/dest_folder/train...
Cleaning complete. Removed 0 corrupt file(s) from observations/experiements/dest_folder/train.
Cleaning directory: observations/experiements/dest_folder/val...
Cleaning complete. Removed 0 corrupt file(s) from observations/experiements/dest_folder/val.


Loading data and building the model

In [40]:
# Define constants
IMAGE_SIZE = (128, 128)
BATCH_SIZE = 32

try:
    # Load the training dataset (from the folder we just populated)
    print(f"Loading training data from {TRAIN_DIR}...")
    train_ds = tf.keras.utils.image_dataset_from_directory(
        TRAIN_DIR,
        label_mode='binary', # 'mask' and 'no_mask' will be 0s and 1s
        seed=1337,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
    )

    # Load the validation dataset (from the folder we just populated)
    print(f"Loading validation data from {VAL_DIR}...")
    val_ds = tf.keras.utils.image_dataset_from_directory(
        VAL_DIR,
        label_mode='binary',
        seed=1337,
        image_size=IMAGE_SIZE,
        batch_size=BATCH_SIZE,
    )

    class_names = train_ds.class_names
    print(f"Classes found: {class_names}")

    # Configure dataset for high performance
    AUTOTUNE = tf.data.AUTOTUNE
    train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
    val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

    # --- Model Architecture ---
    model = tf.keras.Sequential([
        layers.Input(shape=(128, 128, 3)),
        layers.Rescaling(1./255),

        # Data augmentation
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),

        # CNN Body
        layers.Conv2D(32, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),

        # Head
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),

        # Output layer
        layers.Dense(1, activation='sigmoid')
    ])

    # Compile the model
    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    print("Model built and compiled successfully.")
    model.summary()

except Exception as e:
    print(f"Error loading data or building model: {e}")
    raise e

Loading training data from observations/experiements/dest_folder/train...
Found 1178 files belonging to 2 classes.
Loading validation data from observations/experiements/dest_folder/val...
Found 194 files belonging to 2 classes.
Classes found: ['with_mask', 'without_mask']
Model built and compiled successfully.


Training the model

In [41]:
try:
    epochs = 10
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=epochs
    )
    print("Model training complete.")

    model_filename = 'face_mask_model.keras'
    model.save(model_filename)
    print(f"Model saved as '{model_filename}'")

    # Use Colab's 'files' utility to trigger the browser download
    files.download(model_filename)
    print(f"Triggering download for '{model_filename}'. Please check your browser.")

except Exception as e:
    print(f"An error occurred during training or saving: {e}")

Epoch 1/10
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 64ms/step - accuracy: 0.6958 - loss: 0.5801 - val_accuracy: 0.9227 - val_loss: 0.1933
Epoch 2/10
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.9648 - loss: 0.1225 - val_accuracy: 0.9536 - val_loss: 0.0989
Epoch 3/10
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.9732 - loss: 0.0746 - val_accuracy: 0.9742 - val_loss: 0.1020
Epoch 4/10
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - accuracy: 0.9735 - loss: 0.0708 - val_accuracy: 0.9691 - val_loss: 0.0869
Epoch 5/10
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.9878 - loss: 0.0544 - val_accuracy: 0.9691 - val_loss: 0.1274
Epoch 6/10
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.9763 - loss: 0.0581 - val_accuracy: 0.9742 - val_loss: 0.1446
Epoch 7/10
[1m37/37[0m [32m━━━━

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Triggering download for 'face_mask_model.keras'. Please check your browser.
