# Imports


In [18]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
import os
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from sklearn.utils.class_weight import compute_class_weight

from sklearn.preprocessing import OneHotEncoder
import pickle
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import regularizers

# <h2 id="settings"> Settings </h2>

In [19]:

batch_size = 128
img_height = 128
img_width = 128
data_dir = 'imagenes/product_images'

## Data Load

We load the clean data without records that do not contain images or where the images did not exist at the time of download (their URL returned a 404 error when loading)


In [20]:
path_csv= ''

In [21]:
train_data = pd.read_csv(path_csv+'train_filtered.csv')
val_data = pd.read_csv(path_csv+'val_filtered.csv')
test_data = pd.read_csv(path_csv+'test_filtered.csv')

# LABEL ENCODERS FUNCTION


In [22]:
listdis = os.listdir('./artifacts')

In [29]:

def encode_labels(train_data, val_data, test_data, column , category_level):
    
    train_labels = train_data[column]
    val_labels = val_data[column]
    test_labels = test_data[column]    

    if 'label_encoder_level'+category_level+'.pkl' in listdis:
        with open('artifacts/label_encoder_level'+category_level+'.pkl', 'rb') as f:
            le = pickle.load(f)
        print('found file')
    else:
        le = LabelEncoder()
        print("don't found file")

    print(f"num clases {le.classes_} " )

    num_classes = 13
    train_label_le = le.transform(train_labels)
    val_label_le = le.transform(val_labels)
    test_label_le = le.transform(test_labels)

    train_label_cate = to_categorical(train_label_le)
    val_label_cate = to_categorical(val_label_le)
    test_label_cate = to_categorical(test_label_le)

    return train_label_cate, val_label_cate, test_label_cate, train_label_le, num_classes



# Preprocess create_dataset image function

In [None]:
def preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [img_height, img_width])
    image = image / 255.0  # Normalizar los píxeles a [0, 1]
    return image

def create_dataset(data, image_column, labels):
    image_paths = data[image_column].values
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    dataset = dataset.map(lambda x, y: (preprocess_image(x), y), num_parallel_calls=tf.data.AUTOTUNE)
    return dataset



# -------------------    L E V E L   1   ----------------------------

# LABEL ENCODERS LEVEL 1

In [33]:
train_labels, val_labels, test_labels, train_label_le, num_classes = encode_labels(train_data, val_data, test_data, 'subcat1_name', '1')

#Compute the class weight
class_weights = compute_class_weight('balanced', classes=np.unique(train_label_le), y=train_label_le)
class_weight_dict = dict(enumerate(class_weights))

carge
num clases ['Appliances' 'Audio' 'Cameras & Camcorders' 'Car Electronics & GPS'
 'Cell Phones' 'Computers & Tablets' 'Connected Home & Housewares'
 'Health, Fitness & Beauty' 'Musical Instruments' 'Other'
 'TV & Home Theater' 'Toys, Games & Drones' 'Video Games'] 


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


# Create dataset

In [34]:
train_ds = create_dataset(train_data, 'image', train_labels)
val_ds = create_dataset(val_data, 'image', val_labels)
test_ds = create_dataset(test_data, 'image', test_labels)

train_ds = train_ds.shuffle(1000).batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = val_ds.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

In [43]:
train_data.iloc[:1]

Unnamed: 0,name,type,description,image,subcat1_name,subcat2_name,subcat3_name,subcat4_name,subcat5_name
0,mod. - Victoria Camera Accessory Kit - Hot Pink,HardGood,MOD. Victoria Camera Accessory Kit: Compatible...,imagenes/product_images/1285055.jpg,Cameras & Camcorders,Digital Camera Accessories,Other,Other,Universal Camera Bags & Cases


# Model CNN

## CNN Imports 

In [48]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import json

## Model architecture build_basic_cnn

In [49]:
def build_basic_cnn(input_shape, num_classes):
    # Define the input layer with the specified shape.
    inputs = Input(shape=input_shape)

    # First convolutional layer: 256 filters, 3x3 kernel, ReLU activation, L2 regularization.
    x = Conv2D(256, (3, 3), activation='relu', kernel_regularizer=l2(0.001))(inputs)
    # Apply max pooling with a 2x2 window to downsample the feature maps.
    x = MaxPooling2D((2, 2))(x)
    # Apply dropout to reduce overfitting with a rate of 20%.
    x = Dropout(0.2)(x)

    # Second convolutional layer: 128 filters, 3x3 kernel, ReLU activation, L2 regularization.
    x = Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.001))(x)
    # Apply max pooling with a 2x2 window to downsample the feature maps.
    x = MaxPooling2D((2, 2))(x)
    # Apply dropout to reduce overfitting with a rate of 20%.
    x = Dropout(0.2)(x)

    # Third convolutional layer: 64 filters, 3x3 kernel, ReLU activation, L2 regularization.
    x = Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.001))(x)
    # Apply max pooling with a 2x2 window to downsample the feature maps.
    x = MaxPooling2D((2, 2))(x)
    # Apply dropout to reduce overfitting with a rate of 20%.
    x = Dropout(0.2)(x)

    # Fourth convolutional layer: 32 filters, 3x3 kernel, ReLU activation, L2 regularization.
    x = Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2(0.001))(x)
    # Apply max pooling with a 2x2 window to downsample the feature maps.
    x = MaxPooling2D((2, 2))(x)
    # Apply dropout to reduce overfitting with a rate of 20%.
    x = Dropout(0.2)(x)

    # Flatten the 3D tensor output from the last convolutional layer into a 1D vector.
    x = Flatten()(x)
    # Fully connected (dense) layer with 128 units, ReLU activation, L2 regularization.
    x = Dense(128, activation='relu', kernel_regularizer=l2(0.001))(x)
    # Apply batch normalization to stabilize and accelerate training.
    x = BatchNormalization()(x)
    # Apply dropout to reduce overfitting with a rate of 50%.
    x = Dropout(0.5)(x)

    # Output layer with units equal to the number of classes and softmax activation for classification.
    outputs = Dense(num_classes, activation='softmax')(x)

    # Create the model using the input and output layers defined above.
    model = Model(inputs=inputs, outputs=outputs)
    return model


### Initialize and Compile the CNN Model with Callbacks

In [52]:
# Clear any previous Keras session to ensure that the model is built from scratch
tf.keras.backend.clear_session()

# Define the input shape of the images (128x128 pixels with 3 color channels) and the number of output classes
input_shape = (128, 128, 3)
num_classes = 13

# Build a basic CNN model using the input shape and number of classes defined above
model = build_basic_cnn(input_shape, num_classes)

# Compile the model with the Adam optimizer, categorical cross-entropy loss function, and accuracy as the metric
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Define a callback to stop training early if the validation loss does not improve for 10 epochs, restoring the best weights
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Define a callback to reduce the learning rate when the validation loss plateaus, with a minimum learning rate threshold
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-7)


In [53]:
# Checking the shape of the labels
print("Shape of train labels:", train_labels.shape)
print("Shape of validation labels:", val_labels.shape)
print("Shape of test labels:", test_labels.shape)

# Checking the structure of the datasets
for images, labels in train_ds.take(1):
    print("Shape of batch images:", images.shape)
    print("Shape of batch labels:", labels.shape)

for images, labels in val_ds.take(1):
    print("Shape of batch images:", images.shape)
    print("Shape of batch labels:", labels.shape)


Shape of train labels: (35018, 13)
Shape of validation labels: (7515, 13)
Shape of test labels: (7491, 13)
Shape of batch images: (128, 128, 128, 3)
Shape of batch labels: (128, 13)
Shape of batch images: (128, 128, 128, 3)
Shape of batch labels: (128, 13)


## Fit Model Level 1

In [None]:
history = model.fit(
     train_ds,
     epochs=100,
     validation_data=val_ds,
     class_weight=class_weight_dict,
     callbacks=[early_stopping, reduce_lr]
)

In [None]:
def save_model(level_input, model, history):
    # Construct filenames using the provided level input
    model_filename = f'models/level{level_input}_model_image.keras'
    config_filename = f'models/level{level_input}_config_images.pkl'
    weights_filename = f'models/level{level_input}_weights_images.pkl'
    history = f'models/level{level_input}_history_images.pkl'
    
    # Save the model to a file
    model.save(model_filename)
    
    # Get the configuration and weights of the model
    config = model.get_config()
    weights = model.get_weights()
    
    # Save the model configuration to a file
    with open(config_filename, "wb") as f:
        pickle.dump(config, f)
    
    # Save the model weights to a file
    with open(weights_filename, "wb") as f:
        pickle.dump(weights, f)


    # Guardar el historial de entrenamiento
    with open(history, 'wb') as f:
        pickle.dump(history.history, f)
    
    print(f"The model architecture and weights for level {level_input} have been saved.")

In [None]:
level_input = 1  
save_model(level_input, model , history)

## Test

In [None]:
import numpy as np

# Make predictions on the test dataset
test_predictions = model.predict(test_ds)

# Convert predictions and labels to their original values
test_pred_classes = np.argmax(test_predictions, axis=1)
test_true_classes = np.argmax(test_labels, axis=1)


### Evaluating Model Performance: F1-Score, Precision, Recall, and Classification Report

In [None]:
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report

# Calculate F1-score
f1 = f1_score(test_true_classes, test_pred_classes, average='weighted')
precision = precision_score(test_true_classes, test_pred_classes, average='weighted')
recall = recall_score(test_true_classes, test_pred_classes, average='weighted')

# Print the metrics
print(f'F1-score: {f1}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')

# Display the classification report
print(classification_report(test_true_classes, test_pred_classes))


### Visualizing Model Training: Loss and Accuracy Curves

In [None]:
import matplotlib.pyplot as plt

# Plot the loss during training and validation
plt.figure(figsize=(8, 4))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

# Plot the accuracy during training and validation
plt.figure(figsize=(8, 4))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.show()
