In [1]:
import numpy as np
import pandas as pd
import cv2
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import OneHotEncoder
from tqdm import tqdm

In [2]:
img_dir = './images/images/'  
train_data = pd.read_csv('./data/train_data.csv')

train_data = train_data[train_data['type'] == 'Bottom']  

def load_images_and_labels(images_path, train_data):
    images = []
    labels = []

    image_paths = train_data['des_filename'].map(lambda x: os.path.join(images_path, x))

    for image_path, label in tqdm(zip(image_paths, train_data[['silhouette_type','length_type','closure_placement', 'knit_structure', 'waist_type', 'woven_structure']].values), 
                                  total=len(train_data), desc="Loading Images"):
        if os.path.exists(image_path):
            img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (64, 64)) 
            images.append(img.flatten())  
            labels.append(label)

    return np.array(images), np.array(labels)

train_data = train_data[['des_filename','silhouette_type','length_type','closure_placement', 'knit_structure', 'waist_type', 'woven_structure']]

X, y = load_images_and_labels(img_dir, train_data)

encoder = OneHotEncoder()
y_encoded = encoder.fit_transform(y).toarray()

y_silhouette = y[:, 0]
y_length = y[:, 1]
y_closure = y[:, 2]
y_knit = y[:, 3]
y_waist = y[:, 4]
y_woven = y[:, 5]



Loading Images: 100%|██████████| 14678/14678 [00:03<00:00, 4362.96it/s]


In [3]:
X_train, X_val, y_train_silhouette, y_val_silhouette = train_test_split(X, y_silhouette, test_size=0.2, random_state=42)
_, _, y_train_length, y_val_length = train_test_split(X, y_length, test_size=0.2, random_state=42)
_, _, y_train_closure, y_val_closure = train_test_split(X, y_closure, test_size=0.2, random_state=42)
_, _, y_train_knit, y_val_knit = train_test_split(X, y_knit, test_size=0.2, random_state=42)
_, _, y_train_waist, y_val_waist = train_test_split(X, y_waist, test_size=0.2, random_state=42)
_, _, y_train_woven, y_val_woven = train_test_split(X, y_woven, test_size=0.2, random_state=42)


rf = RandomForestClassifier(random_state=42)

param_grid = {
    'n_estimators': [100],
    'max_depth': [15],
    'min_samples_split': [5],
    'bootstrap': [True],
    'max_features': ['auto'],
    'criterion': ['gini', 'entropy'],
}

In [4]:
def train_and_evaluate(X_train, y_train, X_val, y_val, attribute_name):
    print(f"Training model for {attribute_name}...")
    grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, 
                                cv=3, n_jobs=-1, verbose=1, scoring='accuracy')
    grid_search.fit(X_train, y_train)

    best_model = grid_search.best_estimator_
    best_params = grid_search.best_params_
    print(f"Best hyperparameters for {attribute_name}: {best_params}")

    y_pred = best_model.predict(X_val)
    accuracy = accuracy_score(y_val, y_pred)
    print(f"Accuracy for {attribute_name}: {accuracy * 100:.2f}%")
    print(classification_report(y_val, y_pred))

    return best_model

In [5]:
model_silhouette = train_and_evaluate(X_train, y_train_silhouette, X_val, y_val_silhouette, "Silhouette type")
model_length = train_and_evaluate(X_train, y_train_length, X_val, y_val_length, "Length type")
model_closure = train_and_evaluate(X_train, y_train_closure, X_val, y_val_closure, "Closure placement")
model_knit = train_and_evaluate(X_train, y_train_knit, X_val, y_val_knit, "Knit structure")
model_waist = train_and_evaluate(X_train, y_train_waist, X_val, y_val_waist, "Waist type")
model_woven = train_and_evaluate(X_train, y_train_woven, X_val, y_val_woven, "Woven structure")


pred_silhouette = model_silhouette.predict(X_val)
pred_length = model_length.predict(X_val)
pred_closure = model_closure.predict(X_val)
pred_knit = model_knit.predict(X_val)
pred_waist = model_waist.predict(X_val)
pred_woven = model_woven.predict(X_val)

predictions = pd.DataFrame({
    'silhouette_type': pred_silhouette,
    'length_type': pred_length,
    'closure_placement': pred_closure,
    'knit_structure': pred_knit,
    'waist_type': pred_waist,
    'woven_structure': pred_woven
})


print("Combined Predictions:")
print(predictions.head())

Training model for Silhouette type...
Fitting 3 folds for each of 2 candidates, totalling 6 fits


  warn(


Best hyperparameters for Silhouette type: {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 15, 'max_features': 'auto', 'min_samples_split': 5, 'n_estimators': 100}
Accuracy for Silhouette type: 42.00%
                    precision    recall  f1-score   support

                -1       0.47      0.30      0.37       352
       5 Bolsillos       0.75      0.10      0.17        63
Acampanado/Bootcut       1.00      0.26      0.41        43
  Acampanado/Flare       0.50      0.20      0.29       114
         Boyfriend       0.00      0.00      0.00         3
             Cargo       0.00      0.00      0.00        52
             Chino       1.00      0.19      0.32        16
           Culotte       0.53      0.38      0.44       136
             Evase       0.55      0.18      0.28       130
            Jogger       0.63      0.31      0.42       142
             Loose       0.00      0.00      0.00         6
             Lápiz       0.00      0.00      0.00        27
          

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  warn(


Best hyperparameters for Length type: {'bootstrap': True, 'criterion': 'gini', 'max_depth': 15, 'max_features': 'auto', 'min_samples_split': 5, 'n_estimators': 100}
Accuracy for Length type: 59.60%


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

          -1       0.78      0.35      0.49       237
       Capri       1.00      0.13      0.24        15
       Corto       0.67      0.83      0.74       391
        Crop       0.52      0.21      0.30       412
       Largo       0.54      0.82      0.65       953
        Maxi       0.00      0.00      0.00        19
        Midi       0.60      0.41      0.49       137
  Mini/Micro       0.00      0.00      0.00        42
    Standard       0.65      0.62      0.63       669
   Tobillero       1.00      0.02      0.03        61

    accuracy                           0.60      2936
   macro avg       0.58      0.34      0.36      2936
weighted avg       0.60      0.60      0.56      2936

Training model for Closure placement...
Fitting 3 folds for each of 2 candidates, totalling 6 fits


  warn(


Best hyperparameters for Closure placement: {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 15, 'max_features': 'auto', 'min_samples_split': 5, 'n_estimators': 100}
Accuracy for Closure placement: 90.12%
                  precision    recall  f1-score   support

              -1       0.91      0.99      0.95      2635
Cierre Delantero       0.00      0.00      0.00        53
  Cierre Trasero       0.57      0.07      0.13        56
         Lateral       0.17      0.01      0.02        77
      Sin cierre       0.43      0.20      0.27       115

        accuracy                           0.90      2936
       macro avg       0.42      0.26      0.28      2936
    weighted avg       0.85      0.90      0.87      2936

Training model for Knit structure...
Fitting 3 folds for each of 2 candidates, totalling 6 fits


  warn(


Best hyperparameters for Knit structure: {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 15, 'max_features': 'auto', 'min_samples_split': 5, 'n_estimators': 100}
Accuracy for Knit structure: 96.32%
              precision    recall  f1-score   support

          -1       0.96      1.00      0.98      2828
Hecho a mano       0.00      0.00      0.00         1
  Punto Fino       0.00      0.00      0.00        45
Punto Grueso       0.00      0.00      0.00         9
 Punto Medio       0.00      0.00      0.00        20
  Punto fino       0.00      0.00      0.00        19
Punto grueso       0.00      0.00      0.00         4
 Punto medio       0.00      0.00      0.00        10

    accuracy                           0.96      2936
   macro avg       0.12      0.12      0.12      2936
weighted avg       0.93      0.96      0.95      2936

Training model for Waist type...
Fitting 3 folds for each of 2 candidates, totalling 6 fits


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  warn(


Best hyperparameters for Waist type: {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 15, 'max_features': 'auto', 'min_samples_split': 5, 'n_estimators': 100}
Accuracy for Waist type: 50.51%
                precision    recall  f1-score   support

            -1       0.78      0.44      0.56       397
Ajustable/Goma       0.54      0.49      0.51       847
    High Waist       0.47      0.38      0.42       619
     Low Waist       1.00      0.03      0.07        88
 Regular Waist       0.46      0.67      0.54       985

      accuracy                           0.51      2936
     macro avg       0.65      0.40      0.42      2936
  weighted avg       0.54      0.51      0.50      2936

Training model for Woven structure...
Fitting 3 folds for each of 2 candidates, totalling 6 fits


  warn(


Best hyperparameters for Woven structure: {'bootstrap': True, 'criterion': 'entropy', 'max_depth': 15, 'max_features': 'auto', 'min_samples_split': 5, 'n_estimators': 100}
Accuracy for Woven structure: 61.72%
              precision    recall  f1-score   support

          -1       0.73      0.59      0.66       767
    Elástico       0.00      0.00      0.00         2
      Ligero       0.54      0.29      0.38       779
       Medio       0.60      0.86      0.71      1309
      Pesado       0.00      0.00      0.00        79

    accuracy                           0.62      2936
   macro avg       0.37      0.35      0.35      2936
weighted avg       0.60      0.62      0.59      2936



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Combined Predictions:
  silhouette_type length_type closure_placement knit_structure   
0          Jogger    Standard                -1             -1  \
1              -1    Standard                -1             -1   
2           Recto       Corto                -1             -1   
3           Recto       Corto                -1             -1   
4           Recto       Corto                -1             -1   

       waist_type woven_structure  
0   Regular Waist           Medio  
1  Ajustable/Goma           Medio  
2   Regular Waist           Medio  
3  Ajustable/Goma           Medio  
4   Regular Waist           Medio  


In [6]:
#save all models to ./models/bottom using joblib
from joblib import dump
dump(model_silhouette, './models/bottom/bottom_silhouette.joblib')
dump(model_length, './models/bottom/bottom_length.joblib')
dump(model_closure, './models/bottom/bottom_closure.joblib')
dump(model_knit, './models/bottom/bottom_knit.joblib')
dump(model_waist, './models/bottom/bottom_waist.joblib')
dump(model_woven, './models/bottom/bottom_woven.joblib')


['./models/bottom/bottom_woven.joblib']