In [1]:
import numpy as np
import pandas as pd
import cv2
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import OneHotEncoder
from tqdm import tqdm


In [57]:
# Assuming train_data is a pandas DataFrame with columns 'des_filename' and 'label'
img_dir = './images/images/'
train_data = pd.read_csv('./data/train_data.csv')

In [50]:
def load_images_and_labels(images_path, train_data):
    images = []
    labels = []

    # Crear rutas de las imágenes
    image_paths = train_data['des_filename'].map(lambda x: os.path.join(images_path, x))
    
    # Iterar sobre imágenes y etiquetas
    for image_path, label in tqdm(zip(image_paths, train_data[['cane_height_type', 'heel_shape_type', 'toecap_type']].values), 
                                  total=len(train_data), desc="Loading Images"):
        if os.path.exists(image_path):
            img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (64, 64))  # Redimensionar las imágenes
            images.append(img.flatten())
            labels.append(label)

    return np.array(images), np.array(labels)

In [51]:
train_data_new = train_data[train_data['type']=='Shoes']
train_data = train_data_new[['des_filename','cane_height_type','heel_shape_type','toecap_type']]

X, y = load_images_and_labels(img_dir, train_data)

encoder = OneHotEncoder()
y_encoded = encoder.fit_transform(y).toarray()

y_cane_height = y[:, 0]
y_heel_shape = y[:, 1]
y_toecap = y[:, 2]

X_train, X_val, y_train_cane, y_val_cane = train_test_split(X, y_cane_height, test_size=0.2, random_state=42)
_, _, y_train_heel, y_val_heel = train_test_split(X, y_heel_shape, test_size=0.2, random_state=42)
_, _, y_train_toecap, y_val_toecap = train_test_split(X, y_toecap, test_size=0.2, random_state=42)

rf = RandomForestClassifier(random_state=42)

param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'bootstrap': [True],
    'max_features': ['auto'],
    'criterion': ['gini', 'entropy'],
}

Loading Images: 100%|██████████| 2399/2399 [00:00<00:00, 4673.44it/s]


In [52]:
def train_and_evaluate(X_train, y_train, X_val, y_val, category_name):

    print(f"Entrenando modelo para {category_name}...")
    grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, 
                               cv=5, 
                               n_jobs=-1, 
                               verbose=2, 
                               scoring='accuracy')
    
    print("Fitting model with GridSearchCV...")
    grid_search.fit(X_train, y_train)

    # Mejor modelo
    best_model = grid_search.best_estimator_
    best_params = grid_search.best_params_
    print(f"Mejores hiperparámetros para {category_name}: {best_params}")

    # Evaluar en conjunto de validación
    y_pred = best_model.predict(X_val)
    accuracy = accuracy_score(y_val, y_pred)
    print(f"Exactitud en {category_name}: {accuracy * 100:.2f}%")
    print(classification_report(y_val, y_pred))

    return best_model

In [None]:
model_cane_height = train_and_evaluate(X_train, y_train_cane, X_val, y_val_cane, "Cane Height")
model_heel_shape = train_and_evaluate(X_train, y_train_heel, X_val, y_val_heel, "Heel Shape")
model_toecap = train_and_evaluate(X_train, y_train_toecap, X_val, y_val_toecap, "Toecap")

# Hacer predicciones con los tres modelos
pred_cane_height = model_cane_height.predict(X_val)
pred_heel_shape = model_heel_shape.predict(X_val)
pred_toecap = model_toecap.predict(X_val)

# Combinar las predicciones en un DataFrame
predictions = pd.DataFrame({
    'cane_height_type': pred_cane_height,
    'heel_shape_type': pred_heel_shape,
    'toecap_type': pred_toecap
})

print("Predicciones combinadas:")
print(predictions.head())

Entrenando modelo para Cane Height...
Fitting model with GridSearchCV...
Fitting 5 folds for each of 32 candidates, totalling 160 fits


  warn(


Mejores hiperparámetros para Cane Height: {'bootstrap': True, 'criterion': 'entropy', 'max_depth': None, 'max_features': 'auto', 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
Exactitud en Cane Height: 88.75%
                precision    recall  f1-score   support

            -1       0.90      0.99      0.94       380
          Alta       0.83      1.00      0.91        10
          Baja       1.00      0.14      0.25         7
        Bloque       0.91      0.83      0.87        24
          Cuña       0.00      0.00      0.00        20
Cuña abotinada       0.70      0.50      0.58        28
         Media       0.83      0.45      0.59        11

      accuracy                           0.89       480
     macro avg       0.74      0.56      0.59       480
  weighted avg       0.85      0.89      0.86       480

Entrenando modelo para Heel Shape...
Fitting model with GridSearchCV...
Fitting 5 folds for each of 32 candidates, totalling 160 fits


  warn(


Mejores hiperparámetros para Heel Shape: {'bootstrap': True, 'criterion': 'entropy', 'max_depth': None, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
Exactitud en Heel Shape: 69.17%
                                  precision    recall  f1-score   support

                              -1       0.92      0.52      0.67        21
                          Bloque       0.74      0.33      0.46        42
                            Cuña       0.00      0.00      0.00         9
                        De aguja       0.69      0.51      0.59        43
                          Embudo       0.83      0.24      0.37        21
                          Kitten       0.00      0.00      0.00         8
                           Plano       0.68      0.99      0.81       279
                      Plataforma       0.00      0.00      0.00        22
Plataforma en la parte delantera       0.00      0.00      0.00         5
                Plataforma plan

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  warn(


Mejores hiperparámetros para Toecap: {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 20, 'max_features': 'auto', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
Exactitud en Toecap: 70.62%
              precision    recall  f1-score   support

          -1       0.76      0.65      0.70        57
     Abierta       0.63      0.35      0.45        34
   Con punta       0.71      0.47      0.56        58
    Cuadrada       0.94      0.25      0.40        64
     Redonda       0.69      0.93      0.79       267

    accuracy                           0.71       480
   macro avg       0.75      0.53      0.58       480
weighted avg       0.73      0.71      0.68       480

Predicciones combinadas:
  cane_height_type heel_shape_type toecap_type
0               -1           Plano     Abierta
1               -1           Plano     Redonda
2               -1           Plano     Redonda
3               -1           Plano     Redonda
4               -1           Plan

ValueError: could not convert string to float: 'Cuña abotinada'

In [55]:
#save the three models
import joblib
joblib.dump(model_cane_height, './models/shoes/model_cane_height.joblib')
joblib.dump(model_heel_shape, './models/shoes/model_heel_shape.joblib')
joblib.dump(model_toecap, './models/shoes/model_toecap.joblib')

['./models/shoes/model_toecap.joblib']

In [None]:
predictions

Unnamed: 0,cane_height_type,heel_shape_type,toecap_type
0,-1,Plano,Abierta
1,-1,Plano,Redonda
2,-1,Plano,Redonda
3,-1,Plano,Redonda
4,-1,Plano,Redonda
...,...,...,...
475,-1,Plano,Redonda
476,-1,Plano,Redonda
477,Cuña,Plano,Redonda
478,-1,Plano,Abierta
