In [1]:
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import DepthwiseConv2D, Conv2D, BatchNormalization, AveragePooling2D, Dense, Activation, Flatten, Reshape, Add, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
import numpy as np


def relu6(inputs):
    '''
        Performs the ReLU6 activation function for the bottleneck stage of the MobileNet V2
        Inputs:
            inputs: the layer with the inputs for the activation function
        Return:
            Min value between the value of the regular ReLU function and 6
    '''
    
    return K.relu(inputs,max_value=6)

def bottleneck(inputs, t, alpha, num_filters, kernel_sz=(3,3),stride=(1,1),pad='same',residual=False,dropout=False,dropout_perc=0.1):    
    '''
        Performs the bottleneck stage of the MobileNet V2
        Inputs:
            inputs: the layer with the inputs
            t: the value used to increase the number of filters of the expansion stage
            alpha: width multiplier that controls the number of filters of the output tensor
            num_filters: number of filters of the output tensor
            kernel_sz = kernel size of the filter
            stride: stride of the kernel
            pad: padding of the filter
            residual: parameter that determine the sum of the input and output of the bottleneck stage
            dropout: determine if dropout will be performed 
            dropout_perc: percentage of neurons that will be set to zero
        Return:
            x: the result of the bottleneck stage
    '''    
    
    # Get the index of the input 4D tensor that represents the number of channels of the image
    # -1 can also represent the last element of the tensor
    channel_idx = 1 if K.image_data_format == 'channels_first' else -1
    
    # Number of filters for the expansion convolution
    num_filters_exp = K.int_shape(inputs)[channel_idx] * t    
    
    # Number of filters of the projection convolution
    num_filters_proj = int(num_filters * alpha)
    
    # Expansion layer
    x = Conv2D(filters=num_filters_exp,kernel_size=(1,1),strides=(1,1),padding=pad)(inputs)
    x = BatchNormalization()(x)
    x = Activation(relu6)(x)
    
    # Depthwise convolution
    x = DepthwiseConv2D(kernel_size=kernel_sz,strides=stride,depth_multiplier=1,padding=pad)(x)
    x = BatchNormalization()(x)
    x = Activation(relu6)(x)
    
    # Projection convolution
    x = Conv2D(filters=num_filters_proj,kernel_size=(1,1),strides=(1,1),padding=pad)(x)
    x = BatchNormalization()(x)
    
    if (residual == True):
        x = Add()([x,inputs])
        
    if (dropout == True):
      x = Dropout(dropout_perc)(x)
    
    return x

def depthwise_block(inputs,stride,kernel_sz=(3,3),pad='same'):
    '''
        Function that performs the depthwise convolution
        Inputs:
            inputs:    the input shape of the depthwise convolution
            kernel_sz: a tuple that indicates the size of the filtering kernel
            stride:    a tuple that indicates the strides of the kernel
        Return:
            x: the result of the depthwise convolution
    '''
        
    x = DepthwiseConv2D(kernel_size=kernel_sz,strides=stride,depth_multiplier=1,padding=pad)(inputs)
    x = BatchNormalization()(x)
    x = Activation(activation='relu')(x)
        
    return x

def pointwise_block(inputs,num_filters,alpha,kernel_sz=(1,1),stride=(1,1),pad='same',dropout=False,dropout_perc=0.1):
    '''
        Function that performs the pointwise convolution
        Inputs:
            inputs:      the input shape of the depthwise convolution
            num_filters: number of filters to be used in the convolution
            kernel_sz:   a tuple that indicates the size of the filtering kernel
            stride:      a tuple that indicates the strides of the kernel
            dropout: determine if dropout will be performed 
            dropout_perc: percentage of neurons that will be set to zero            
        Return:
            x: the result of the pointwise convolution
    '''    
    
    # Number of filters based on width multiplier reported in the original paper
    n_fil = int(num_filters * alpha)    
    
    x = Conv2D(filters=n_fil,kernel_size=kernel_sz,padding=pad)(inputs)
    x = BatchNormalization()(x)
    x = Activation(activation='relu')(x)
    
    if (dropout == True):
      x = Dropout(dropout_perc)(x)
    
    return x


def MobileNetV2_ex(input_shape, num_units, filters=32, kernel_sz=(3,3),stride=(2,2),alp=1,ro=1,dropout_perc=0.1):
    input_shape = (int(input_shape[0] * ro), int(input_shape[1] * ro), input_shape[2])
    
    inputs = Input(shape=input_shape)
    
    # Regular convolution
    x = Conv2D(filters=filters,kernel_size=kernel_sz,strides=stride)(inputs)
    x = BatchNormalization()(x)
    x = Activation(relu6)(x)
    x = Dropout(dropout_perc)(x)

    # First bottleneck convolution
    x = bottleneck(x,t=1,alpha=alp,num_filters=16,kernel_sz=(3,3),stride=(1,1),dropout=True,dropout_perc=dropout_perc)

    # Second bottleneck convolution (peformed 2 times)
    x = bottleneck(x,t=6,alpha=alp,num_filters=24,kernel_sz=(3,3),stride=(2,2),dropout=True,dropout_perc=dropout_perc)
    x = bottleneck(x,t=6,alpha=alp,num_filters=24,kernel_sz=(3,3),stride=(1,1), residual=True,dropout=True,dropout_perc=dropout_perc)
    
    # Third bottleneck convolution (peformed 3 times)
    x = bottleneck(x,t=6,alpha=alp,num_filters=32,kernel_sz=(3,3),stride=(2,2))
    x = bottleneck(x,t=6,alpha=alp,num_filters=32,kernel_sz=(3,3),stride=(1,1), residual=True,dropout=True,dropout_perc=dropout_perc)
    x = bottleneck(x,t=6,alpha=alp,num_filters=32,kernel_sz=(3,3),stride=(1,1), residual=True,dropout=True,dropout_perc=dropout_perc)
    
    # Fourth bottleneck convolution (performed 4 times)
    x = bottleneck(x,t=6,alpha=alp,num_filters=64,kernel_sz=(3,3),stride=(2,2))
    x = bottleneck(x,t=6,alpha=alp,num_filters=64,kernel_sz=(3,3),stride=(1,1), residual=True,dropout=True,dropout_perc=dropout_perc)
    x = bottleneck(x,t=6,alpha=alp,num_filters=64,kernel_sz=(3,3),stride=(1,1), residual=True,dropout=True,dropout_perc=dropout_perc)
    x = bottleneck(x,t=6,alpha=alp,num_filters=64,kernel_sz=(3,3),stride=(1,1), residual=True,dropout=True,dropout_perc=dropout_perc)
    
    # Fifth bottleneck convolution (performed 3 times)
    x = bottleneck(x,t=6,alpha=alp,num_filters=96,kernel_sz=(3,3),stride=(1,1))
    x = bottleneck(x,t=6,alpha=alp,num_filters=96,kernel_sz=(3,3),stride=(1,1), residual=True,dropout=True,dropout_perc=dropout_perc)
    x = bottleneck(x,t=6,alpha=alp,num_filters=96,kernel_sz=(3,3),stride=(1,1), residual=True,dropout=True,dropout_perc=dropout_perc)
    
    # Sixth bottleneck convolution (performed 3 times)
    x = bottleneck(x,t=6,alpha=alp,num_filters=160,kernel_sz=(3,3),stride=(2,2))
    x = bottleneck(x,t=6,alpha=alp,num_filters=160,kernel_sz=(3,3),stride=(1,1), residual=True,dropout=True,dropout_perc=dropout_perc)
    x = bottleneck(x,t=6,alpha=alp,num_filters=160,kernel_sz=(3,3),stride=(1,1), residual=True,dropout=True,dropout_perc=dropout_perc)
    
    # Seventh bottleneck convolution (performed 1 time)
    x = bottleneck(x,t=6,alpha=alp,num_filters=320,kernel_sz=(3,3),stride=(1,1),dropout=True,dropout_perc=dropout_perc)
    
    # Eigth layer (regular convolution)
    x = Conv2D(filters=1280, kernel_size=(1,1), strides=(1,1), padding='same')(x)
    
    # Pooling layer
    # Pooling size correction due to the resolution multiplier parameter
    pool_size = int(np.round(7*ro))
    x = AveragePooling2D(padding='valid',pool_size=(pool_size,pool_size),strides=(1,1))(x)    
    
    x = Conv2D(filters=num_units,kernel_size=(1,1),strides=(1,1), padding='same')(x)
    
    output = Reshape((num_units,))(Activation(activation='softmax')(x))
    
    return Model(inputs,output)

2024-06-25 08:16:52.238591: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
alpha = 1
ro = 1
img_size = 224
target_sz = int(img_size * ro)
batch_sz = 32
epo = 100

# image_generator = ImageDataGenerator(rotation_range=15,
#                                       rescale=1./255,
#                                       shear_range=0.2,
#                                       zoom_range=0.1,
#                                       horizontal_flip=True,
#                                       fill_mode='nearest')
# train_generator = image_generator.flow_from_directory('/content/gdrive/My Drive/Imagens/Airplane models/Treinamento',
#                                                      target_size=(target_sz,target_sz),
#                                                      color_mode='rgb',
#                                                      batch_size=batch_sz,
#                                                      class_mode='categorical',
#                                                      shuffle=True)

# image_generator = ImageDataGenerator(rescale=1./255)
# test_generator = image_generator.flow_from_directory('/content/gdrive/My Drive/Imagens/Airplane models/Teste',
#                                                      target_size=(target_sz,target_sz),
#                                                      color_mode='rgb',
#                                                      batch_size=batch_sz,
#                                                      class_mode='categorical',
#                                                      shuffle=True)

In [3]:
import os

PROJECT = !gcloud config list --format 'value(core.project)'
PROJECT = PROJECT[0]
BUCKET = PROJECT+"-capstone"
REGION = "us-central1"

os.environ["BUCKET"] = BUCKET
os.environ["REGION"] = REGION

In [4]:
from google.cloud import storage
from collections import defaultdict
import os
import re
import random

# Initialize the storage client
storage_client = storage.Client()

# Set bucket name from environment variable
bucket_name = os.environ["BUCKET"]
bucket = storage_client.bucket(bucket_name)

image_folder = "train"

# List all image files in the specified folder
blobs = bucket.list_blobs(prefix=image_folder)

image_urls = []
labels = []
images = []

# Function to extract label from the blob name
def extract_label(blob_name):
    # Example regex to extract label: 'train/category/image.jpg'
    match = re.search(r'train/([^/]+)/.*', blob_name)
    return match.group(1) if match else 'unknown'

# Dictionary to keep track of image counts per label
label_counts = defaultdict(int)

# Dictionary to set a random limit for each label
# label_limits = defaultdict(lambda: random.randint(600, 700))

# Collect image URLs and their labels, limit to a random number between 600 and 700 per label
for blob in blobs:
    if blob.name.lower().endswith(('.png', '.jpg', '.jpeg')) and blob.name.lower().startswith('train/tomato'):
        label = extract_label(blob.name)
        # if label_counts[label] < label_limits[label]:
        image_urls.append(f"gs://{bucket_name}/{blob.name}")
        labels.append(label)
        label_counts[label] += 1

print(f"Found {len(image_urls)} images.")

# Print the count of images for each label
for label, count in label_counts.items():
    print(f"Label: {label}, Number of Images: {count}")


Found 18345 images.
Label: Tomato___Bacterial_spot, Number of Images: 1702
Label: Tomato___Early_blight, Number of Images: 1920
Label: Tomato___Late_blight, Number of Images: 1851
Label: Tomato___Leaf_Mold, Number of Images: 1882
Label: Tomato___Septoria_leaf_spot, Number of Images: 1745
Label: Tomato___Spider_mites Two-spotted_spider_mite, Number of Images: 1741
Label: Tomato___Target_Spot, Number of Images: 1827
Label: Tomato___Tomato_Yellow_Leaf_Curl_Virus, Number of Images: 1961
Label: Tomato___Tomato_mosaic_virus, Number of Images: 1790
Label: Tomato___healthy, Number of Images: 1926


In [5]:

# def load_dataset(csv_of_filenames, batch_size, training=True):
#     dataset = (
#         tf.data.TextLineDataset(filenames=csv_of_filenames)
#         .map(decode_csv)
#         .cache()
#     )

#     if training:
#         dataset = (
#             dataset.map(read_and_preprocess_with_augment)
#             .shuffle(SHUFFLE_BUFFER)
#             .repeat(count=None)
#         )  # Indefinately.
#     else:
#         dataset = dataset.map(read_and_preprocess).repeat(
#             count=1
#         )  # Each photo used once.

#     # Prefetch prepares the next set of batches while current batch is in use.
#     return dataset.batch(batch_size=batch_size).prefetch(buffer_size=AUTOTUNE)


# def load_data_from_gcs(image_urls):
#     for url in image_urls[:10]:
#         images.append(preprocess_image(url))
#     return images

In [6]:
# images = load_data_from_gcs(image_urls)

In [7]:
from google.cloud import storage
from collections import defaultdict
import os
import re
import random

# Initialize the storage client
storage_client = storage.Client()

# Set bucket name from environment variable
bucket_name = os.environ["BUCKET"]
bucket = storage_client.bucket(bucket_name)

image_folder = "valid"

# List all image files in the specified folder
blobs = bucket.list_blobs(prefix=image_folder)

val_image_urls = []
val_labels = []
val_images = []

# Function to extract label from the blob name
def extract_label(blob_name):
    # Example regex to extract label: 'train/category/image.jpg'
    match = re.search(r'valid/([^/]+)/.*', blob_name)
    return match.group(1) if match else 'unknown'

# Dictionary to keep track of image counts per label
label_counts = defaultdict(int)

# Dictionary to set a random limit for each label
# label_limits = defaultdict(lambda: random.randint(90, 105))

# Collect image URLs and their labels, limit to a random number between 600 and 700 per label
for blob in blobs:
    if blob.name.lower().endswith(('.png', '.jpg', '.jpeg')) and blob.name.lower().startswith('valid/tomato'):
        label = extract_label(blob.name)
        # if label_counts[label] < label_limits[label]:
        val_image_urls.append(f"gs://{bucket_name}/{blob.name}")
        val_labels.append(label)
        label_counts[label] += 1

print(f"Found {len(val_image_urls)} images.")

# Print the count of images for each label
for label, count in label_counts.items():
    print(f"Label: {label}, Number of Images: {count}")


Found 4585 images.
Label: Tomato___Bacterial_spot, Number of Images: 425
Label: Tomato___Early_blight, Number of Images: 480
Label: Tomato___Late_blight, Number of Images: 463
Label: Tomato___Leaf_Mold, Number of Images: 470
Label: Tomato___Septoria_leaf_spot, Number of Images: 436
Label: Tomato___Spider_mites Two-spotted_spider_mite, Number of Images: 435
Label: Tomato___Target_Spot, Number of Images: 457
Label: Tomato___Tomato_Yellow_Leaf_Curl_Virus, Number of Images: 490
Label: Tomato___Tomato_mosaic_virus, Number of Images: 448
Label: Tomato___healthy, Number of Images: 481


In [8]:
CLASS_NAMES = np.array(
    ["Tomato___Bacterial_spot", 
"Tomato___Early_blight", 
"Tomato___Late_blight",
"Tomato___Leaf_Mold",
"Tomato___Septoria_leaf_spot",
"Tomato___Spider_mites Two-spotted_spider_mite",
"Tomato___Target_Spot",
"Tomato___Tomato_Yellow_Leaf_Curl_Virus",
"Tomato___Tomato_mosaic_virus",
"Tomato___healthy"]
)

print(f"These are {len(CLASS_NAMES)} available classes:", CLASS_NAMES)

These are 10 available classes: ['Tomato___Bacterial_spot' 'Tomato___Early_blight' 'Tomato___Late_blight'
 'Tomato___Leaf_Mold' 'Tomato___Septoria_leaf_spot'
 'Tomato___Spider_mites Two-spotted_spider_mite' 'Tomato___Target_Spot'
 'Tomato___Tomato_Yellow_Leaf_Curl_Virus' 'Tomato___Tomato_mosaic_virus'
 'Tomato___healthy']


In [14]:
import tensorflow as tf
SHUFFLE_BUFFER = 1000 
batch_size = 32
MAX_DELTA = 63.0 / 255.0  # Change brightness by at most 17.7%
CONTRAST_LOWER = 0.2
CONTRAST_UPPER = 1.8


# Function to load and preprocess an image
def read_and_preprocess(file_path, label_str, random_augment=False):
    if random_augment:
        image = tf.io.read_file(file_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.random_brightness(image, MAX_DELTA)
        image = tf.image.random_contrast(image, CONTRAST_LOWER, CONTRAST_UPPER)
        image = tf.image.resize(image, [224, 224])  # Adjust to your target size
        image = tf.cast(image, tf.float32) / 255.0  # Normalize to [0, 1]
    else: 
        image = tf.io.read_file(file_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize(image, [224, 224])  # Adjust to your target size
        image = tf.cast(image, tf.float32) / 255.0  # Normalize to [0, 1]
    label = tf.math.equal(CLASS_NAMES, label_str)
    return image, label

def read_and_preprocess_with_augment(file_path, label_str):
    return read_and_preprocess(file_path, label_str, random_augment=True)

def load_dataset(img_urls, lbls, batch_size, training=True):
    dataset = tf.data.Dataset.from_tensor_slices((img_urls, lbls))
    if training:
        dataset = dataset.map(read_and_preprocess_with_augment, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        dataset = dataset.shuffle(SHUFFLE_BUFFER)
        dataset = dataset.repeat()
    else:
        dataset = dataset.map(read_and_preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        dataset = dataset.repeat(1)
        
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return dataset
    

# Load training and validation datasets
train_dataset = load_dataset(image_urls, labels, batch_size, training=True)
val_dataset = load_dataset(val_image_urls, val_labels, batch_size, training=False)



In [15]:
for image, label in train_dataset.take(1):
    print(image.shape)
    print(image)

2024-06-25 08:20:27.678924: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [18345]
	 [[{{node Placeholder/_1}}]]
2024-06-25 08:20:27.679434: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [18345]
	 [[{{node Placeholder/_1}}]]
2024-06-25 08:20:37.718047: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 726 of 1000
2024-06-25 08:20:42.745609: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.


(32, 224, 224, 3)
tf.Tensor(
[[[[0.4254902  0.4053221  0.37420967]
   [0.45852342 0.43549418 0.40334132]
   [0.4782313  0.45686275 0.42314926]
   ...
   [0.53353345 0.5142057  0.4867547 ]
   [0.53333336 0.5137855  0.48633453]
   [0.53333336 0.5137255  0.4862745 ]]

  [[0.42358944 0.39983994 0.37210885]
   [0.45188075 0.4252701  0.39407766]
   [0.44903964 0.4232293  0.3926971 ]
   ...
   [0.5397358  0.5235093  0.49605834]
   [0.537535   0.51924765 0.4917967 ]
   [0.53909564 0.5195478  0.49209684]]

  [[0.43437374 0.4096439  0.38191277]
   [0.46158463 0.43885556 0.40832335]
   [0.43435374 0.4090236  0.38107243]
   ...
   [0.5506402  0.5328331  0.5053821 ]
   [0.5534414  0.53721493 0.50866354]
   [0.56292534 0.5435975  0.5160466 ]]

  ...

  [[0.467427   0.45192075 0.42839134]
   [0.47791108 0.46474585 0.44121644]
   [0.4764104  0.46234477 0.43881536]
   ...
   [0.5487994  0.54257697 0.5160263 ]
   [0.5448178  0.5375149  0.51314515]
   [0.542937   0.53509384 0.510264  ]]

  [[0.47911173 0

In [16]:
for image, label in val_dataset.take(1):
    print(image.shape)
    print(image)

2024-06-25 08:20:44.014907: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [4585]
	 [[{{node Placeholder/_1}}]]
2024-06-25 08:20:44.017471: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [4585]
	 [[{{node Placeholder/_1}}]]


(32, 224, 224, 3)
tf.Tensor(
[[[[0.38007203 0.34085634 0.33693478]
   [0.56656665 0.5273509  0.52342933]
   [0.5322329  0.4930172  0.48909563]
   ...
   [0.6185876  0.5832935  0.5950582 ]
   [0.6331133  0.5978192  0.6095839 ]
   [0.63921577 0.60392165 0.61568636]]

  [[0.44783917 0.4086235  0.40470192]
   [0.5597639  0.5205482  0.51662666]
   [0.53635454 0.49713886 0.4932173 ]
   ...
   [0.62386966 0.58857554 0.60034025]
   [0.63267314 0.597379   0.60914373]
   [0.6370349  0.6017408  0.6135055 ]]

  [[0.5323529  0.49313724 0.48921567]
   [0.5345338  0.4953181  0.49139655]
   [0.50680274 0.46758705 0.4636655 ]
   ...
   [0.6080632  0.5727691  0.5845338 ]
   [0.6084434  0.57314926 0.58491397]
   [0.60868347 0.57338935 0.58515406]]

  ...

  [[0.5467988  0.5154263  0.50758314]
   [0.5560823  0.52470976 0.5168666 ]
   [0.56246483 0.5310923  0.52324915]
   ...
   [0.6278512  0.6043218  0.6121649 ]
   [0.63193274 0.6084033  0.61624646]
   [0.6340736  0.6105442  0.61838734]]

  [[0.56382567 0

In [10]:
# import tensorflow as tf

# # Assuming `dataset` is your tf.data.Dataset object
# for img_url, lbl in val_dataset.take(2):
#     print(f"Image shape: {img_url.shape}, Label: {lbl}")
# len(val_dataset)

In [11]:
# from tensorflow.keras import Input, Model
# from tensorflow.keras.layers import DepthwiseConv2D, Conv2D, BatchNormalization, AveragePooling2D, Dense, Activation, Flatten, Reshape, Add, Dropout
# from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from tensorflow.keras import backend as K
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
# import numpy as np

# VALIDATION_STEPS = 980 // 32

# model_v2 = MobileNetV2_ex(input_shape=(224,224,3), num_units=10, alp=1, ro=1, dropout_perc=0.2)

# optimizer = Adam(lr=0.001)

# model_v2.compile(optimizer=optimizer,loss='categorical_crossentropy', metrics=['accuracy'])

# model_check_point_v2 = ModelCheckpoint(filepath='../model/model_mb_v2.hdf5',monitor='val_loss',verbose=1,save_best_only=False,save_weights_only=False)

# reduce_lr = ReduceLROnPlateau(monitor='val_loss',factor=0.7,patience=10,verbose=1,min_lr=0.0001)


# history_train_v2 = model_v2.fit(
#                     train_dataset,
#                     epochs=10,
#                     steps_per_epoch=5,
#                     validation_data=val_dataset,
#                     validation_steps=VALIDATION_STEPS
# )

# # workers=8,
# # use_multiprocessing=False,
# # callbacks=[reduce_lr, model_check_point_v2])
# model_v2.save('../model/model_mb_v2.hdf5')

In [12]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.optimizers import Adam

base_model = MobileNetV2(input_shape=(224,224,3), include_top=False, weights='imagenet')

# Add a global average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)

# Add a fully connected layer
x = Dense(512, activation='relu')(x)

# Add a classification layer with softmax activation
predictions = Dense(10, activation='softmax')(x)  # 10 classes 

# Combine the base model with the custom layers
model = Model(inputs=base_model.input, outputs=predictions)

# model.compile(optimizer='adam',
#               loss='sparse_categorical_crossentropy',
#               metrics=['accuracy'])
optimizer = Adam(lr=0.0001)

model.compile(optimizer=optimizer,loss='categorical_crossentropy', metrics=['accuracy'])


early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss',factor=0.7,patience=1,verbose=1,min_lr=0.000001)
model_check_point_v2 = ModelCheckpoint(filepath='../model/model_mb_v2_3.hdf5',monitor='val_loss',verbose=1,save_best_only=True,save_weights_only=False)




In [None]:
epochs = 10
batch_size = 32
epo_step = len(labels) // 32
VALIDATION_STEPS = len(val_labels) // 32

model.fit(train_dataset,
          epochs=epochs,
          steps_per_epoch= epo_step,
          batch_size=batch_size,
          validation_data=val_dataset,
          validation_steps = VALIDATION_STEPS,
          callbacks=[reduce_lr, model_check_point_v2])

model.save('../model/model_mb_v2_3.hdf5')




Epoch 1/10


2024-06-25 07:10:53.467857: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [18345]
	 [[{{node Placeholder/_1}}]]
2024-06-25 07:10:53.468275: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [18345]
	 [[{{node Placeholder/_0}}]]
2024-06-25 07:11:13.252281: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 990 of 1000
2024-06-25 07:11:13.359427: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:417] Shuffle buffer filled.
2024-06-25 07:11:14.179335: I tensorflow/compiler/xl



2024-06-25 07:14:54.998910: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [4585]
	 [[{{node Placeholder/_0}}]]
2024-06-25 07:14:54.999267: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [4585]
	 [[{{node Placeholder/_1}}]]



Epoch 1: val_loss improved from inf to 14.49984, saving model to ../model/model_mb_v2_3.hdf5
Epoch 2/10
Epoch 2: ReduceLROnPlateau reducing learning rate to 0.0007000000332482159.

Epoch 2: val_loss did not improve from 14.49984
Epoch 3/10
Epoch 3: val_loss improved from 14.49984 to 13.05360, saving model to ../model/model_mb_v2_3.hdf5
Epoch 4/10
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0004900000232737511.

Epoch 4: val_loss did not improve from 13.05360
Epoch 5/10
Epoch 5: val_loss improved from 13.05360 to 9.97078, saving model to ../model/model_mb_v2_3.hdf5
Epoch 6/10
Epoch 6: val_loss improved from 9.97078 to 8.08434, saving model to ../model/model_mb_v2_3.hdf5
Epoch 7/10

In [None]:
# from tensorflow.keras.models import load_model
# model = load_model('../model/model_mb_v2.hdf5')

In [None]:
# from google.cloud import storage
# from collections import defaultdict
# import os
# import re
# import random


# # Initialize the storage client
# storage_client = storage.Client()

# # Set bucket name from environment variable
# bucket_name = os.environ["BUCKET"]
# bucket = storage_client.bucket(bucket_name)

# image_folder = "test"

# # List all image files in the specified folder
# blobs = bucket.list_blobs(prefix=image_folder)

# image_urls = []
# labels = []
# images = []


# # Dictionary to keep track of image counts per label
# label_counts = defaultdict(int)

# # Dictionary to set a random limit for each label
# label_limits = defaultdict(lambda: random.randint(600, 700))

# # Collect image URLs and their labels, limit to a random number between 600 and 700 per label
# for blob in blobs:
#     if blob.name.lower().endswith(('.png', '.jpg', '.jpeg')) and blob.name.lower().startswith('test/tomato'):
#         label = blob.name.replace('test/', '')
#         if label_counts[label] < label_limits[label]:
#             image_urls.append(f"gs://{bucket_name}/{blob.name}")
#             labels.append(label)
#             label_counts[label] += 1

# print(f"Found {len(image_urls)} images.")

# # Print the count of images for each label
# for label, count in label_counts.items():
#     print(f"Label: {label}, Number of Images: {count}")


In [None]:
# import numpy as np
# import tensorflow as tf
# from tensorflow.keras.preprocessing import image
# from io import BytesIO

# # Helper function to download and preprocess an image
# def download_and_preprocess_image(url):
#     # Download image
#     image_blob = bucket.blob(url.replace(f"gs://{bucket_name}/", ""))
#     image_content = image_blob.download_as_bytes()
    
#     # Load image
#     img = image.load_img(BytesIO(image_content), target_size=(224, 224))
#     img_array = image.img_to_array(img)
#     img_array = np.expand_dims(img_array, axis=0)
#     img_array /= 255.0
#     return img_array

# # Download and preprocess test images
# test_images = np.vstack([download_and_preprocess_image(url) for url in image_urls])

# # Load your model (assuming it's a Keras model)
# model = tf.keras.models.load_model('../model/model_mb_v2.hdf5')

# # Make predictions
# predictions = model.predict(test_images)

# # Print predictions
# for url, prediction in zip(image_urls, predictions):
#     print(f"Image URL: {url}, Prediction: {prediction}")