In [11]:
import numpy as np
import pandas as pd
import cv2
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import OneHotEncoder
from tqdm import tqdm


In [12]:
# Assuming train_data is a pandas DataFrame with columns 'des_filename' and 'label'
img_dir = './images/images/'
train_data = pd.read_csv('./data/train_data.csv')

In [13]:
def load_images_and_labels(images_path, train_data):
    images = []
    labels = []

    # Crear rutas de las imágenes
    image_paths = train_data['des_filename'].map(lambda x: os.path.join(images_path, x))
    
    # Iterar sobre imágenes y etiquetas
    for image_path, label in tqdm(zip(image_paths, train_data[['silhouette_type', 'closure_placement', 'knit_structure','length_type','neck_lapel_type', 'sleeve_length_type', 'woven_structure']].values), 
                                  total=len(train_data), desc="Loading Images"):
        if os.path.exists(image_path):
            img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (64, 64))  # Redimensionar las imágenes
            images.append(img.flatten())
            labels.append(label)

    return np.array(images), np.array(labels)

In [14]:
train_data_new = train_data[train_data['type']=='Top']
train_data = train_data_new[['des_filename','silhouette_type', 'closure_placement', 'knit_structure','length_type','neck_lapel_type', 'sleeve_length_type', 'woven_structure']]
train_data = train_data[train_data['des_filename'] != '86_1208032_47001267-15_.jpg']

X, y = load_images_and_labels(img_dir, train_data)

encoder = OneHotEncoder()
y_encoded = encoder.fit_transform(y).toarray()

y_silhouette_type = y[:, 0]
y_closure_placement = y[:, 1]
y_knit = y[:, 2]
y_length = y[:, 3]
y_neck = y[:, 4]
y_sleeve = y[:, 5]
y_woven = y[:, 6]

X_train, X_val, y_train_silhouette, y_val_silhouette = train_test_split(X, y_silhouette_type, test_size=0.2, random_state=42)
_, _, y_train_closure, y_val_closure = train_test_split(X, y_closure_placement, test_size=0.2, random_state=42)
_, _, y_train_knit, y_val_knit = train_test_split(X, y_knit, test_size=0.2, random_state=42)
_, _, y_train_length, y_val_length = train_test_split(X, y_length, test_size=0.2, random_state=42)
_, _, y_train_neck, y_val_neck = train_test_split(X, y_neck, test_size=0.2, random_state=42)
_, _, y_train_sleeve, y_val_sleeve = train_test_split(X, y_sleeve, test_size=0.2, random_state=42)
_, _, y_train_woven, y_val_woven = train_test_split(X, y_woven, test_size=0.2, random_state=42)

rf = RandomForestClassifier(random_state=42)

param_grid = {
    'n_estimators': [100,],
    'max_depth': [15],
    'min_samples_split': [5],
    'min_samples_leaf': [2],
    'bootstrap': [True],
    'criterion': ['entropy'],
}

Loading Images: 100%|██████████| 44244/44244 [00:28<00:00, 1552.58it/s]


In [15]:
def train_and_evaluate(X_train, y_train, X_val, y_val, category_name):

    print(f"Entrenando modelo para {category_name}...")
    grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, 
                               cv=5, 
                               n_jobs=-1, 
                               verbose=2, 
                               scoring='accuracy')
    
    print("Fitting model with GridSearchCV...")
    grid_search.fit(X_train, y_train)

    # Mejor modelo
    best_model = grid_search.best_estimator_
    best_params = grid_search.best_params_
    print(f"Mejores hiperparámetros para {category_name}: {best_params}")

    # Evaluar en conjunto de validación
    y_pred = best_model.predict(X_val)
    accuracy = accuracy_score(y_val, y_pred)
    print(f"Exactitud en {category_name}: {accuracy * 100:.2f}%")
    print(classification_report(y_val, y_pred))

    return best_model

In [16]:
model_silhouette = train_and_evaluate(X_train, y_train_silhouette, X_val, y_val_silhouette, "Silhouette")
model_closure_placement = train_and_evaluate(X_train, y_train_closure, X_val, y_val_closure, "Closure Placement")
model_knit = train_and_evaluate(X_train, y_train_knit, X_val, y_val_knit, "Knit")
model_length = train_and_evaluate(X_train, y_train_length, X_val, y_val_length, "Length")
model_neck = train_and_evaluate(X_train, y_train_neck, X_val, y_val_neck, "Neck")
model_sleeve = train_and_evaluate(X_train, y_train_sleeve, X_val, y_val_sleeve, "Sleeve")
model_woven = train_and_evaluate(X_train, y_train_woven, X_val, y_val_woven, "Woven")

# Hacer predicciones con los tres modelos
pred_silhouette = model_silhouette.predict(X_val)
pred_closure_placement = model_closure_placement.predict(X_val)
pred_knit = model_knit.predict(X_val)
pred_length = model_length.predict(X_val)
pred_neck = model_neck.predict(X_val)
pred_sleeve = model_sleeve.predict(X_val)
pred_woven = model_woven.predict(X_val)

# Combinar las predicciones en un DataFrame
predictions = pd.DataFrame({
    'silhouette_type': pred_silhouette,
    'closure_placement': pred_closure_placement,
    'knit_structure': pred_knit,
    'length_type': pred_length,
    'neck_lapel_length': pred_neck,
    'sleeve_length_type': pred_sleeve,
    'woven_structure': pred_woven
})

Entrenando modelo para Silhouette...
Fitting model with GridSearchCV...
Fitting 5 folds for each of 1 candidates, totalling 5 fits




Mejores hiperparámetros para Silhouette: {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 15, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
Exactitud en Silhouette: 52.29%
              precision    recall  f1-score   support

          -1       0.73      0.33      0.45       919
       Ancho       0.00      0.00      0.00         1
       Evase       0.57      0.24      0.34       594
      Halter       0.00      0.00      0.00         8
 Modern slim       1.00      0.30      0.46        10
    Oversize       0.75      0.05      0.10       777
       Recto       0.50      0.93      0.65      3801
     Regular       0.56      0.21      0.30      1037
     Relaxed       0.61      0.09      0.16       153
        Slim       0.64      0.24      0.35      1546
   Superslim       0.00      0.00      0.00         3

    accuracy                           0.52      8849
   macro avg       0.49      0.22      0.26      8849
weighted avg       0.58      0.52      0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Mejores hiperparámetros para Closure Placement: {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 15, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
Exactitud en Closure Placement: 59.91%
                  precision    recall  f1-score   support

              -1       0.62      0.82      0.71      3457
Cierre Delantero       0.60      0.68      0.64      2633
   Cierre Hombro       0.00      0.00      0.00        12
  Cierre Trasero       0.52      0.08      0.15       402
          Cuello       0.73      0.14      0.23       417
         Lateral       0.80      0.03      0.05       157
      Sin cierre       0.50      0.33      0.40      1771

        accuracy                           0.60      8849
       macro avg       0.54      0.30      0.31      8849
    weighted avg       0.59      0.60      0.56      8849

Entrenando modelo para Knit...
Fitting model with GridSearchCV...
Fitting 5 folds for each of 1 candidates, totalling 5 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Mejores hiperparámetros para Knit: {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 15, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
Exactitud en Knit: 80.39%
              precision    recall  f1-score   support

          -1       0.81      1.00      0.89      7042
Hecho a mano       0.00      0.00      0.00         6
  Punto Fino       0.68      0.10      0.17       613
Punto Grueso       0.00      0.00      0.00       221
 Punto Medio       0.43      0.01      0.02       318
  Punto fino       0.51      0.07      0.12       335
Punto grueso       0.67      0.02      0.03       120
 Punto medio       0.60      0.05      0.09       191
     UNKNOWN       0.00      0.00      0.00         3

    accuracy                           0.80      8849
   macro avg       0.41      0.14      0.15      8849
weighted avg       0.75      0.80      0.73      8849

Entrenando modelo para Length...
Fitting model with GridSearchCV...
Fitting 5 folds for each of 1 candida

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Mejores hiperparámetros para Length: {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 15, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
Exactitud en Length: 66.56%
              precision    recall  f1-score   support

          -1       0.65      0.49      0.56      1171
  Asimétrico       0.00      0.00      0.00        10
       Corto       0.75      0.44      0.56       908
        Crop       0.63      0.17      0.27       494
       Largo       0.71      0.44      0.55      1364
        Maxi       0.00      0.00      0.00         6
       Medio       0.91      0.06      0.12       162
        Midi       0.88      0.03      0.05       255
  Mini/Micro       0.00      0.00      0.00        14
    Standard       0.65      0.95      0.77      4448
   Tobillero       0.00      0.00      0.00         5
Tres Cuartos       0.00      0.00      0.00        12

    accuracy                           0.67      8849
   macro avg       0.43      0.22      0.24     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Mejores hiperparámetros para Neck: {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 15, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
Exactitud en Neck: 44.16%
                    precision    recall  f1-score   support

                -1       0.53      0.31      0.39       844
   Alto/Envolvente       0.53      0.16      0.25       336
        Asimétrico       0.88      0.15      0.26        46
Babydoll/Peter Pan       0.00      0.00      0.00        21
             Barca       0.00      0.00      0.00        63
       Button Down       0.00      0.00      0.00         9
              Caja       0.39      0.27      0.32       894
          Camisero       0.44      0.42      0.43       867
           Capucha       0.76      0.49      0.59       403
          Chimenea       0.73      0.27      0.39        89
             Cisne       0.60      0.05      0.08        66
           Cruzado       0.50      0.03      0.06       130
           Cutaway       0.00

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Mejores hiperparámetros para Sleeve: {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 15, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 100}
Exactitud en Sleeve: 71.24%
               precision    recall  f1-score   support

           -1       0.72      0.14      0.24       961
        Corta       0.85      0.60      0.70      1787
        Larga       0.70      0.99      0.82      4774
    Sin Manga       0.58      0.34      0.43       605
Tirante Ancho       0.68      0.08      0.14       244
 Tirante Fino       0.59      0.38      0.47       372
 Tres Cuartos       0.67      0.04      0.07       106

     accuracy                           0.71      8849
    macro avg       0.68      0.37      0.41      8849
 weighted avg       0.72      0.71      0.66      8849

Entrenando modelo para Woven...
Fitting model with GridSearchCV...
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Mejores hiperparámetros para Woven: {'bootstrap': True, 'criterion': 'en

In [17]:
#save all models to ./models/tops using joblib
from joblib import dump
dump(model_silhouette, './models/tops/top_silhouette.joblib')
dump(model_closure_placement, './models/tops/top_closure_placement.joblib')
dump(model_knit, './models/tops/top_knit.joblib')
dump(model_length, './models/tops/top_length.joblib')
dump(model_neck, './models/tops/top_neck.joblib')
dump(model_sleeve, './models/tops/top_sleeve.joblib')
dump(model_woven, './models/tops/top_woven.joblib')



['./models/tops/top_woven.joblib']