# ResNet

## Installation and import of the libraries

In [2]:
import pandas as pd
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
import numpy as np

## Preprocess

In [3]:
# Cargar el archivo CSV con las características
df = pd.read_csv('wide_data.csv')

# Definir la carpeta donde se encuentran las imágenes
image_folder = '../archive/images/images'

df_subset = df.sample(n=5000, random_state=42)

def preprocess_image(img_path):
    try:
        img = load_img(img_path, target_size=(224, 224))  # Redimensionar la imagen
        img_array = img_to_array(img)  # Convertir la imagen a array
        img_array = preprocess_input(img_array)  # Preprocesar la imagen para ResNet50
        return img_array
    except Exception as e:
        print(f"Error al cargar la imagen {img_path}: {e}")
        return None  # Retornar None si la imagen no se carga correctamente

# Función para cargar las etiquetas con valores nulos reemplazados por 0
def load_labels(row):
    label = [
        row['silhouette_type'] if pd.notna(row['silhouette_type']) else 0,
        row['waist_type'] if pd.notna(row['waist_type']) else 0,
        row['neck_lapel_type'] if pd.notna(row['neck_lapel_type']) else 0,
        row['sleeve_length_type'] if pd.notna(row['sleeve_length_type']) else 0,
        row['toecap_type'] if pd.notna(row['toecap_type']) else 0,
        row['closure_placement'] if pd.notna(row['closure_placement']) else 0,
        row['cane_height_type'] if pd.notna(row['cane_height_type']) else 0,
        row['heel_shape_type'] if pd.notna(row['heel_shape_type']) else 0,
        row['knit_structure'] if pd.notna(row['knit_structure']) else 0,
        row['length_type'] if pd.notna(row['length_type']) else 0,
        row['woven_structure'] if pd.notna(row['woven_structure']) else 0
    ]
    return label

# Función que convierte las imágenes y etiquetas en tensores adecuados para el entrenamiento
def image_data_generator(df, image_folder, batch_size=32):
    images = []
    labels = []

    for index, row in df.iterrows():
        img_id = row['des_filename']
        img_path = os.path.join(image_folder, f"{img_id}")

        try:
            img_array = preprocess_image(img_path)
            images.append(img_array)
        except Exception as e:
            print(f"Error al cargar la imagen {img_path}: {e}")
            continue

        label = load_labels(row)
        labels.append(label)

        # Cuando se alcanza el tamaño del lote, se devuelve el lote
        if len(images) == batch_size:
            labels_one_hot = [to_categorical(np.array(labels)[:, i], num_classes=10000) for i in range(len(labels[0]))]
            yield np.array(images), tuple(labels_one_hot)  # Devolvemos una tupla
            images = []
            labels = []

    # Devolver el último lote si hay imágenes restantes
    if len(images) > 0:
        labels_one_hot = [to_categorical(np.array(labels)[:, i], num_classes=10000) for i in range(len(labels[0]))]
        yield np.array(images), tuple(labels_one_hot)  # Devolvemos una tupla


## Build

In [4]:
# Crear el modelo base de ResNet50 sin la capa superior
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Agregar las capas para la predicción de las características
x = base_model.output
x = GlobalAveragePooling2D()(x)

# Añadir una capa de salida para cada característica
output_silhouette = Dense(10000, activation='softmax', name='silhouette_type')(x)
output_waist = Dense(10000, activation='softmax', name='waist_type')(x)
output_neck_lapel = Dense(10000, activation='softmax', name='neck_lapel_type')(x)
output_sleeve_length = Dense(10000, activation='softmax', name='sleeve_length_type')(x)
output_toecap = Dense(10000, activation='softmax', name='toecap_type')(x)
output_closure = Dense(10000, activation='softmax', name='closure_placement')(x)
output_cane_height = Dense(10000, activation='softmax', name='cane_height_type')(x)
output_heel_shape = Dense(10000, activation='softmax', name='heel_shape_type')(x)
output_knit = Dense(10000, activation='softmax', name='knit_structure')(x)
output_length = Dense(10000, activation='softmax', name='length_type')(x)
output_woven = Dense(10000, activation='softmax', name='woven_structure')(x)

# Crear el modelo con múltiples salidas
model = Model(inputs=base_model.input, outputs=[output_silhouette, output_waist, output_neck_lapel,
                                                output_sleeve_length, output_toecap, output_closure,
                                                output_cane_height, output_heel_shape, output_knit,
                                                output_length, output_woven])

# Congelar las capas del modelo base
for layer in base_model.layers:
    layer.trainable = False

# Compilar el modelo
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'] * 11)


## Train

In [5]:
# Uso de tf.data para generar los lotes de imágenes y etiquetas
train_dataset = tf.data.Dataset.from_generator(
    lambda: image_data_generator(df_subset, image_folder),
    output_signature=(
        tf.TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32),
        (tf.TensorSpec(shape=(None, 10000), dtype=tf.float32),) * 11  # Definir las salidas como una tupla de tensores
    )
)

# Ajustar el número de pasos por época y el tamaño de los lotes
steps_per_epoch = len(df_subset) // 32

# Entrenamiento del modelo
model.fit(train_dataset, steps_per_epoch=steps_per_epoch, epochs=5)

Epoch 1/5
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m277s[0m 2s/step - cane_height_type_accuracy: 0.9213 - cane_height_type_loss: 1.1015 - closure_placement_accuracy: 0.5152 - closure_placement_loss: 1.9447 - heel_shape_type_accuracy: 0.8980 - heel_shape_type_loss: 1.2652 - knit_structure_accuracy: 0.7748 - knit_structure_loss: 1.4823 - length_type_accuracy: 0.4357 - length_type_loss: 2.3849 - loss: 20.3014 - neck_lapel_type_accuracy: 0.3825 - neck_lapel_type_loss: 2.9032 - silhouette_type_accuracy: 0.3776 - silhouette_type_loss: 2.7604 - sleeve_length_type_accuracy: 0.5495 - sleeve_length_type_loss: 1.8657 - toecap_type_accuracy: 0.8965 - toecap_type_loss: 1.1355 - waist_type_accuracy: 0.7484 - waist_type_loss: 1.5196 - woven_structure_accuracy: 0.4385 - woven_structure_loss: 1.9383
Epoch 2/5
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 243us/step - cane_height_type_accuracy: 1.0000 - cane_height_type_loss: 7.7494e-04 - closure_placement_accuracy:

2024-11-17 06:02:13.253216: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
  self.gen.throw(typ, value, traceback)


[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m283s[0m 2s/step - cane_height_type_accuracy: 0.9959 - cane_height_type_loss: 0.0154 - closure_placement_accuracy: 0.6836 - closure_placement_loss: 0.8526 - heel_shape_type_accuracy: 0.9869 - heel_shape_type_loss: 0.0550 - knit_structure_accuracy: 0.8530 - knit_structure_loss: 0.4435 - length_type_accuracy: 0.6041 - length_type_loss: 1.1374 - loss: 7.5121 - neck_lapel_type_accuracy: 0.5455 - neck_lapel_type_loss: 1.4359 - silhouette_type_accuracy: 0.4673 - silhouette_type_loss: 1.5191 - sleeve_length_type_accuracy: 0.7811 - sleeve_length_type_loss: 0.6403 - toecap_type_accuracy: 0.9887 - toecap_type_loss: 0.0344 - waist_type_accuracy: 0.8393 - waist_type_loss: 0.4299 - woven_structure_accuracy: 0.5988 - woven_structure_loss: 0.9485
Epoch 4/5
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 108us/step - cane_height_type_accuracy: 1.0000 - cane_height_type_loss: 2.9969e-04 - closure_placement_accuracy: 1.0000 - c

2024-11-17 06:06:57.265510: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m283s[0m 2s/step - cane_height_type_accuracy: 0.9974 - cane_height_type_loss: 0.0067 - closure_placement_accuracy: 0.7076 - closure_placement_loss: 0.7697 - heel_shape_type_accuracy: 0.9925 - heel_shape_type_loss: 0.0248 - knit_structure_accuracy: 0.8622 - knit_structure_loss: 0.3956 - length_type_accuracy: 0.6281 - length_type_loss: 1.0206 - loss: 6.6779 - neck_lapel_type_accuracy: 0.6005 - neck_lapel_type_loss: 1.2389 - silhouette_type_accuracy: 0.5084 - silhouette_type_loss: 1.3633 - sleeve_length_type_accuracy: 0.8068 - sleeve_length_type_loss: 0.5692 - toecap_type_accuracy: 0.9932 - toecap_type_loss: 0.0219 - waist_type_accuracy: 0.8537 - waist_type_loss: 0.3793 - woven_structure_accuracy: 0.6243 - woven_structure_loss: 0.8879


<keras.src.callbacks.history.History at 0x7f7b737c7760>

## Test

### Imports

In [6]:
import os
import csv
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.resnet50 import preprocess_input
import numpy as np
from PIL import UnidentifiedImageError

### Test dataset

In [7]:
# Ruta de la carpeta de imágenes
test_image_folder = '../archive/images/images'

# Obtener la lista de archivos en la carpeta
all_images = sorted(
    [f for f in os.listdir(test_image_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
)[-5000:]  # Seleccionar las últimas 3500 imágenes

# Función para cargar y preprocesar una imagen
def preprocess_image(image_path):
    try:
        img = load_img(image_path, target_size=(224, 224))  # Redimensionar
        img_array = img_to_array(img)  # Convertir a array
        img_array = preprocess_input(img_array)  # Preprocesar
        return img_array
    except UnidentifiedImageError:
        print(f"Advertencia: No se pudo cargar la imagen {image_path}")
        return None

# Filtrar imágenes válidas
test_images_arrays = []
valid_image_paths = []

for img in all_images:
    img_path = os.path.join(test_image_folder, img)
    processed_img = preprocess_image(img_path)
    if processed_img is not None:
        test_images_arrays.append(processed_img)
        valid_image_paths.append(img)  # Solo guardar rutas de imágenes válidas

test_images_arrays = np.array(test_images_arrays)

# Crear el archivo CSV para guardar los resultados
output_csv = 'predictions.csv'


Advertencia: No se pudo cargar la imagen ../archive/images/images/88_49720742_67044470-56_.jpg


### Predictions

In [8]:
# Realizar predicciones
predictions = model.predict(test_images_arrays)

# Crear el archivo CSV para guardar los resultados
output_csv = 'predictions.csv'

# Escribir los resultados en el archivo CSV
with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['test_id', 'des_value'])  # Escribir cabecera del archivo CSV

    for img_idx, img_id in enumerate(valid_image_paths):
        img_name = os.path.splitext(img_id)[0]  # Obtener el ID de la imagen sin extensión
        for output_idx, output_name in enumerate(model.output_names):
            class_id = np.argmax(predictions[output_idx][img_idx])  # Clase más probable
            writer.writerow([f"{img_name}_{output_name}", f"{class_id:04d}"])  # Escribir la fila con formato numérico


: 

## Rewrite

In [12]:
import pandas as pd

# Cargar los datos, omitiendo la primera fila
predictions = pd.read_csv('predictions.csv', header=None, names=['test_id', 'des_value'], skiprows=1)

# Cargar los datos de atributos
attributes_data = pd.read_csv('../archive/attribute_data.csv', header=None, 
                              names=['cod_modelo_color', 'attribute_name', 'cod_value', 'des_value'], skiprows=1)

# Crear un diccionario de mapeo {cod_value: des_value} desde attribute_data
value_to_description = attributes_data.set_index('cod_value')['des_value'].to_dict()

# Función para limpiar 'test_id'
def clean_test_id(test_id):
    parts = test_id.split('__')
    if len(parts) > 1 and '-' in parts[0]:
        prefix, suffix = parts[0].split('-')[0].split('_')[:2]
        attribute = parts[1]
        return f"{prefix}_{suffix}_{attribute}"
    return None

# Aplicar la función para limpiar IDs
predictions['cleaned_test_id'] = predictions['test_id'].apply(clean_test_id)

# Filtrar filas con IDs válidos
predictions = predictions[predictions['cleaned_test_id'].notnull()]

# Mapear los valores de la columna 'des_value' a sus descripciones
predictions['des_value'] = predictions['des_value'].map(value_to_description)

# Eliminar filas duplicadas basadas en 'cleaned_test_id'
unique_predictions = predictions.drop_duplicates(subset='cleaned_test_id', keep='first')

# Guardar el archivo resultante en submission.csv
unique_predictions[['cleaned_test_id', 'des_value']].to_csv('submission.csv', index=False)
