In [1]:
import os

PROJECT = !gcloud config list --format 'value(core.project)'
PROJECT = PROJECT[0]
BUCKET = PROJECT+"-capstone"
REGION = "us-central1"

os.environ["BUCKET"] = BUCKET
os.environ["REGION"] = REGION

In [2]:
# get train 

from google.cloud import storage
from collections import defaultdict
import os
import re
import random

# Initialize the storage client
storage_client = storage.Client()

# Set bucket name from environment variable
bucket_name = os.environ["BUCKET"]
bucket = storage_client.bucket(bucket_name)

image_folder = "train"

# List all image files in the specified folder
blobs = bucket.list_blobs(prefix=image_folder)

image_urls = []
labels = []
images = []

# Function to extract label from the blob name
def extract_label(blob_name):
    # Example regex to extract label: 'train/category/image.jpg'
    match = re.search(r'train/([^/]+)/.*', blob_name)
    return match.group(1) if match else 'unknown'

# Dictionary to keep track of image counts per label
label_counts = defaultdict(int)

# Dictionary to set a random limit for each label
# label_limits = defaultdict(lambda: random.randint(500, 700))

# Collect image URLs and their labels, limit to a random number between 600 and 700 per label
for blob in blobs:
    if blob.name.lower().endswith(('.png', '.jpg', '.jpeg')) and blob.name.lower().startswith('train/tomato'):
        label = extract_label(blob.name)
        # if label_counts[label] < label_limits[label]:
        image_urls.append(f"gs://{bucket_name}/{blob.name}")
        labels.append(label)
        label_counts[label] += 1

print(f"Found {len(image_urls)} images.")

# Print the count of images for each label
for label, count in label_counts.items():
    print(f"Label: {label}, Number of Images: {count}")

Found 18345 images.
Label: Tomato___Bacterial_spot, Number of Images: 1702
Label: Tomato___Early_blight, Number of Images: 1920
Label: Tomato___Late_blight, Number of Images: 1851
Label: Tomato___Leaf_Mold, Number of Images: 1882
Label: Tomato___Septoria_leaf_spot, Number of Images: 1745
Label: Tomato___Spider_mites Two-spotted_spider_mite, Number of Images: 1741
Label: Tomato___Target_Spot, Number of Images: 1827
Label: Tomato___Tomato_Yellow_Leaf_Curl_Virus, Number of Images: 1961
Label: Tomato___Tomato_mosaic_virus, Number of Images: 1790
Label: Tomato___healthy, Number of Images: 1926


In [3]:
# get validation 

from google.cloud import storage
from collections import defaultdict
import os
import re
import random

# Initialize the storage client
storage_client = storage.Client()

# Set bucket name from environment variable
bucket_name = os.environ["BUCKET"]
bucket = storage_client.bucket(bucket_name)

image_folder = "valid"

# List all image files in the specified folder
blobs = bucket.list_blobs(prefix=image_folder)

val_image_urls = []
val_labels = []
val_images = []

# Function to extract label from the blob name
def extract_label(blob_name):
    # Example regex to extract label: 'train/category/image.jpg'
    match = re.search(r'valid/([^/]+)/.*', blob_name)
    return match.group(1) if match else 'unknown'

# Dictionary to keep track of image counts per label
label_counts = defaultdict(int)

# Dictionary to set a random limit for each label
# label_limits = defaultdict(lambda: random.randint(50, 90))

# Collect image URLs and their labels, limit to a random number between 600 and 700 per label
for blob in blobs:
    if blob.name.lower().endswith(('.png', '.jpg', '.jpeg')) and blob.name.lower().startswith('valid/tomato'):
        label = extract_label(blob.name)
        # if label_counts[label] < label_limits[label]:
        val_image_urls.append(f"gs://{bucket_name}/{blob.name}")
        val_labels.append(label)
        label_counts[label] += 1

print(f"Found {len(val_image_urls)} images.")

# Print the count of images for each label
for label, count in label_counts.items():
    print(f"Label: {label}, Number of Images: {count}")


Found 4585 images.
Label: Tomato___Bacterial_spot, Number of Images: 425
Label: Tomato___Early_blight, Number of Images: 480
Label: Tomato___Late_blight, Number of Images: 463
Label: Tomato___Leaf_Mold, Number of Images: 470
Label: Tomato___Septoria_leaf_spot, Number of Images: 436
Label: Tomato___Spider_mites Two-spotted_spider_mite, Number of Images: 435
Label: Tomato___Target_Spot, Number of Images: 457
Label: Tomato___Tomato_Yellow_Leaf_Curl_Virus, Number of Images: 490
Label: Tomato___Tomato_mosaic_virus, Number of Images: 448
Label: Tomato___healthy, Number of Images: 481


In [4]:
import numpy as np

CLASS_NAMES = np.array(
    ["Tomato___Bacterial_spot", 
"Tomato___Early_blight", 
"Tomato___Late_blight",
"Tomato___Leaf_Mold",
"Tomato___Septoria_leaf_spot",
"Tomato___Spider_mites Two-spotted_spider_mite",
"Tomato___Target_Spot",
"Tomato___Tomato_Yellow_Leaf_Curl_Virus",
"Tomato___Tomato_mosaic_virus",
"Tomato___healthy"]
)

print(f"These are {len(CLASS_NAMES)} available classes:", CLASS_NAMES)

These are 10 available classes: ['Tomato___Bacterial_spot' 'Tomato___Early_blight' 'Tomato___Late_blight'
 'Tomato___Leaf_Mold' 'Tomato___Septoria_leaf_spot'
 'Tomato___Spider_mites Two-spotted_spider_mite' 'Tomato___Target_Spot'
 'Tomato___Tomato_Yellow_Leaf_Curl_Virus' 'Tomato___Tomato_mosaic_virus'
 'Tomato___healthy']


In [18]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Assuming image_urls, labels, val_image_urls, and val_labels are defined
# CLASS_NAMES should also be defined as a list of class names

# Function to load and preprocess an image
def preprocess(file_path, label_str):
    image = tf.io.read_file(file_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])  # Adjust to your target size
    image = tf.cast(image, tf.float32) / 255.0  # Normalize to [0, 1]
    label = tf.math.equal(CLASS_NAMES, label_str)
    label = tf.cast(label, tf.float32)  # Convert label to float32
    return image, label

def load_dataset(img_urls, lbls, batch_size, training=True):
    dataset = tf.data.Dataset.from_tensor_slices((img_urls, lbls))
    if training:
        dataset = dataset.map(preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        dataset = dataset.shuffle(SHUFFLE_BUFFER)
        dataset = dataset.cache().repeat()
    else:
        dataset = dataset.map(preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        dataset = dataset.cache().repeat(1)
        
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return dataset

# Load training and validation datasets
batch_size = 128
SHUFFLE_BUFFER = 1024
train_dataset = load_dataset(image_urls, labels, batch_size, training=True)
val_dataset = load_dataset(val_image_urls, val_labels, batch_size, training=False)




In [16]:
import tensorflow as tf
SHUFFLE_BUFFER = 1000 
batch_size = 128
MAX_DELTA = 63.0 / 255.0  # Change brightness by at most 17.7%
CONTRAST_LOWER = 0.2
CONTRAST_UPPER = 1.8


# Function to load and preprocess an image
def read_and_preprocess(file_path, label_str, random_augment=False):
    if random_augment:
        image = tf.io.read_file(file_path)
        image = tf.image.decode_jpeg(image, channels=3)
        # image = tf.image.random_brightness(image, MAX_DELTA)
        # image = tf.image.random_contrast(image, CONTRAST_LOWER, CONTRAST_UPPER)
        image = tf.image.resize(image, [224, 224])  # Adjust to your target size
        image = tf.cast(image, tf.float32) / 255.0  # Normalize to [0, 1]
    else: 
        image = tf.io.read_file(file_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize(image, [224, 224])
        image = tf.cast(image, tf.float32) / 255.0
    label = tf.math.equal(CLASS_NAMES, label_str)
    label = tf.cast(label, tf.float32)
    return image, label

def read_and_preprocess_with_augment(file_path, label_str):
    return read_and_preprocess(file_path, label_str, random_augment=True)

def load_dataset(img_urls, lbls, batch_size, training=True):
    dataset = tf.data.Dataset.from_tensor_slices((img_urls, lbls)).cache()
    if training:
        dataset = dataset.shuffle(len(image_urls))
        dataset = dataset.map(read_and_preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        dataset = dataset.repeat()
    else:
        dataset = dataset.map(read_and_preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        dataset = dataset.repeat(1)
        
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return dataset
    

# Load training and validation datasets
train_dataset = load_dataset(image_urls, labels, batch_size, training=True)
val_dataset = load_dataset(val_image_urls, val_labels, batch_size, training=False)

# 

In [None]:
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Define the ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(len(CLASS_NAMES), activation='softmax')(x)  # Adjust output layer to number of classes

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the layers of ResNet50 base model


# # Compile the model
# model.compile(optimizer=Adam(learning_rate=0.0001, weight_decay = 0.0001), 
#               loss='categorical_crossentropy', 
#               metrics=['accuracy'])

# # Train the model
# history = model.fit(train_dataset,
#                     epochs=10,  # Set the number of epochs as needed
#                     steps_per_epoch=len(image_urls) // batch_size,
#                     validation_data=val_dataset,
#                     validation_steps=len(val_image_urls) // batch_size)

# Unfreeze some layers and fine-tune the model
for layer in base_model.layers[-32:]:  # Unfreeze the last 10 layers
    layer.trainable = True

# Recompile the model with a lower learning rate for fine-tuning
model.compile(optimizer=Adam(learning_rate=0.0001, weight_decay = 0.0001), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

model_checkpoint = ModelCheckpoint(filepath='../model/resnet_50_layers_cp.hdf5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False)
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss',factor=0.7,patience=2,verbose=1,min_lr=0.0001)

# Continue training with fine-tuning
history_fine = model.fit(train_dataset,
                         epochs=20,  # Continue for more epochs if necessary
                         steps_per_epoch=len(image_urls) // batch_size,
                         validation_data=val_dataset,
                         validation_steps=len(val_image_urls) // batch_size,
                         callbacks = [early_stopping,reduce_lr, model_checkpoint]
                        )


# model.save('../model/resnet_50_layers_model.hdf5')

Epoch 1/20

In [23]:
import logging
import os
import re

from google.cloud import storage

from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, Add, Input, GlobalAveragePooling2D, Dense, MaxPooling2D
from tensorflow.keras import callbacks, models
from tensorflow.keras.models import Model




def identity_block(input_tensor, filters, kernel_size):
    filters1, filters2, filters3 = filters
    
    x = Conv2D(filters1, (1, 1))(input_tensor)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv2D(filters2, kernel_size, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv2D(filters3, (1, 1))(x)
    x = BatchNormalization()(x)
    
    x = Add()([x, input_tensor])
    x = Activation('relu')(x)
    return x

def conv_block(input_tensor, filters, kernel_size, strides=(2, 2)):
    filters1, filters2, filters3 = filters
    
    x = Conv2D(filters1, (1, 1), strides=strides)(input_tensor)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv2D(filters2, kernel_size, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv2D(filters3, (1, 1))(x)
    x = BatchNormalization()(x)
    
    shortcut = Conv2D(filters3, (1, 1), strides=strides)(input_tensor)
    shortcut = BatchNormalization()(shortcut)
    
    x = Add()([x, shortcut])
    x = Activation('relu')(x)
    return x

def resnet50(input_shape, classes):
    img_input = Input(shape=input_shape)
    
    # Initial convolution and max-pooling
    x = Conv2D(64, (7, 7), strides=(2, 2), padding='same')(img_input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    
    # First block
    x = conv_block(x, [64, 64, 256], (3, 3), strides=(1, 1))
    x = identity_block(x, [64, 64, 256], (3, 3))
    x = identity_block(x, [64, 64, 256], (3, 3))
    
    # Second block
    x = conv_block(x, [128, 128, 512], (3, 3))
    x = identity_block(x, [128, 128, 512], (3, 3))
    x = identity_block(x, [128, 128, 512], (3, 3))
    x = identity_block(x, [128, 128, 512], (3, 3))
    
    # Third block
    x = conv_block(x, [256, 256, 1024], (3, 3))
    x = identity_block(x, [256, 256, 1024], (3, 3))
    x = identity_block(x, [256, 256, 1024], (3, 3))
    x = identity_block(x, [256, 256, 1024], (3, 3))
    x = identity_block(x, [256, 256, 1024], (3, 3))
    x = identity_block(x, [256, 256, 1024], (3, 3))
    
    # Fourth block
    x = conv_block(x, [512, 512, 2048], (3, 3))
    x = identity_block(x, [512, 512, 2048], (3, 3))
    x = identity_block(x, [512, 512, 2048], (3, 3))
    
    # Global Average Pooling and output layer
    x = GlobalAveragePooling2D()(x)
    x = Dense(classes, activation='softmax')(x)
    
    model = Model(img_input, x)
    return model


In [24]:
# resnet 

input_shape = (224, 224, 3)  # Adjust according to your image dimensions
num_classes = len(CLASS_NAMES)

# Build ResNet50 model
model = resnet50(input_shape, num_classes)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Define training parameters
epochs = 32
batch_size = 128
SHUFFLE_BUFFER = 1024

# Train the model
history = model.fit(train_dataset,
                    epochs=epochs,
                    steps_per_epoch=len(image_urls) // batch_size,
                    validation_data=val_dataset,
                    validation_steps=len(val_image_urls) // batch_size)

# Evaluate the model
loss, accuracy = model.evaluate(val_dataset, steps=len(val_image_urls) // batch_size)
print(f'Validation accuracy: {accuracy * 100:.2f}%')

Epoch 1/32


2024-06-26 10:15:26.689845: W tensorflow/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.78GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-06-26 10:15:26.689917: W tensorflow/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 4.78GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-06-26 10:15:33.783536: W tensorflow/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 5.10GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-06-26 10:15:33.783603: W tensorflow/tsl/framework/bfc_allocator.cc:296] Allocator (GPU

 55/573 [=>............................] - ETA: 10:21 - loss: 2.4696 - accuracy: 0.2859

KeyboardInterrupt: 

In [None]:
from tensorflow.keras.applications import ResNet50

base_model = ResNet50(input_shape=(224,224,3), include_top=False, weights='imagenet')

len(base_model.layers)