In [1]:
import torch
from torch.utils.data import DataLoader, Dataset
import pickle
import pandas as pd

from src.custom_inference_dataset import CustomInferenceDataset
from src.handler import Handler

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
with open('./data/x_num_categories_list.pkl', 'rb') as f:
	x_num_categories_list = pickle.load(f)

with open('./data/y_num_categories_list.pkl', 'rb') as f:
	y_num_categories_list = pickle.load(f)

with open('./data/label_encoders.pkl', 'rb') as f:
	label_encoders = pickle.load(f)

with open('./data/onehot_encoders.pkl', 'rb') as f:
	onehot_encoders = pickle.load(f)

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = Handler(x_num_categories_list=x_num_categories_list, y_num_categories_list=y_num_categories_list)

model.load_state_dict(torch.load('./models/best_model.pth', map_location=device))

model.eval()

  model.load_state_dict(torch.load('./models/best_model.pth', map_location=device))


Handler(
  (image_encoder): ImageEncoder(
    (model): CLIPModel(
      (text_model): CLIPTextTransformer(
        (embeddings): CLIPTextEmbeddings(
          (token_embedding): Embedding(49408, 512)
          (position_embedding): Embedding(77, 512)
        )
        (encoder): CLIPEncoder(
          (layers): ModuleList(
            (0-11): 12 x CLIPEncoderLayer(
              (self_attn): CLIPSdpaAttention(
                (k_proj): Linear(in_features=512, out_features=512, bias=True)
                (v_proj): Linear(in_features=512, out_features=512, bias=True)
                (q_proj): Linear(in_features=512, out_features=512, bias=True)
                (out_proj): Linear(in_features=512, out_features=512, bias=True)
              )
              (layer_norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
              (mlp): CLIPMLP(
                (activation_fn): QuickGELUActivation()
                (fc1): Linear(in_features=512, out_features=2048, bias=True)
       

In [4]:
image_folder_path = './data/archive/images/images'
x_test_file_path = './data/x_test.csv'
y_data_file_path = './data/y_full.csv'

In [5]:
x_test = pd.read_csv(x_test_file_path)

In [6]:
y_labels = pd.read_csv(y_data_file_path).columns.tolist()

In [7]:
tmp = x_test.drop_duplicates(subset='des_filename', keep='first')

In [8]:
dataset = CustomInferenceDataset(data=tmp, image_folder_path=image_folder_path, label_encoders=label_encoders, onehot_encoders=onehot_encoders)
test_loader = DataLoader(dataset, batch_size=1, shuffle=False)

In [9]:
# Inicializa una lista para almacenar los resultados
results = []

# Itera sobre el test_loader para obtener imágenes, datos tabulares y prefijos de imagen
for i, (image, tabular_data, image_prefix) in enumerate(test_loader):
    image = image.to(device)
    tabular_data = tabular_data.to(device)
    image_prefix = image_prefix[0]

    with torch.no_grad():
        try:
            # Realiza las predicciones
            predictions = model(image, tabular_data)
            print(f"Prediction {i + 1}/{len(test_loader)}")

            # Itera sobre cada predicción y etiqueta para calcular los vectores one-hot
            for pred, atr in zip(predictions, y_labels):

                # Elimina dimensiones innecesarias si `pred` tiene dimensiones adicionales
                if pred.dim() > 1:
                    pred = pred.squeeze(0)  # Redimensiona si es necesario

                # Crear un vector one-hot para el índice predicho
                onehot_vector = torch.zeros_like(pred)

                # Encuentra el índice de la predicción más alta
                pred_index = torch.argmax(pred).item()  # Convierte a un número entero

                # Establece el índice predicho en 1
                onehot_vector[pred_index] = 1

                # Decodifica el valor de la clase predicha
                pred_value = dataset.value_from_onehot_encoder(atr, onehot_vector)

                # Agrega el resultado a la lista
                results.append({'test_id': f"{image_prefix}_{atr}", 'des_value': pred_value})

        except Exception as e:
            print(f'Error predicting for test_id {image_prefix}. Error message: {e}')
            # En caso de error, agrega 'INVALID' para cada atributo
            for atr in y_labels:
                results.append({'test_id': f"{image_prefix}_{atr}", 'des_value': 'INVALID'})
            continue

# Convierte la lista de resultados en un DataFrame
resulting_df = pd.DataFrame(results)

# Lee los test_ids únicos desde el archivo de ejemplo
example_results = pd.read_csv('./data/archive/sample_submission.csv')
unique_test_ids = example_results['test_id'].unique()

# Filtra el DataFrame para mantener solo los IDs de prueba únicos
resulting_df = resulting_df[resulting_df['test_id'].isin(unique_test_ids)]

# Guarda el DataFrame resultante en un archivo CSV
resulting_df.to_csv('./data/test_predictions.csv', index=False)

Prediction 1/6529
Prediction 2/6529
Prediction 3/6529
Prediction 4/6529
Prediction 5/6529
Prediction 6/6529
Prediction 7/6529
Prediction 8/6529
Prediction 9/6529
Prediction 10/6529
Prediction 11/6529
Prediction 12/6529
Prediction 13/6529
Prediction 14/6529
Prediction 15/6529
Prediction 16/6529
Prediction 17/6529
Prediction 18/6529
Prediction 19/6529
Prediction 20/6529
Prediction 21/6529
Prediction 22/6529
Prediction 23/6529
Prediction 24/6529
Prediction 25/6529
Prediction 26/6529
Prediction 27/6529
Prediction 28/6529
Prediction 29/6529
Prediction 30/6529
Prediction 31/6529
Prediction 32/6529
Prediction 33/6529
Prediction 34/6529
Prediction 35/6529
Prediction 36/6529
Prediction 37/6529
Prediction 38/6529
Prediction 39/6529
Prediction 40/6529
Prediction 41/6529
Prediction 42/6529
Prediction 43/6529
Prediction 44/6529
Prediction 45/6529
Prediction 46/6529
Prediction 47/6529
Prediction 48/6529
Prediction 49/6529
Prediction 50/6529
Prediction 51/6529
Prediction 52/6529
Prediction 53/6529
Pr

In [10]:
print(resulting_df.head())

Empty DataFrame
Columns: [test_id, des_value]
Index: []


In [18]:
resulting_df = pd.DataFrame(results)
# Convertir el valor de des_value a string (ahora es un array)
resulting_df['des_value'] = resulting_df['des_value'].apply(lambda x: x[0])
# añade '_' despues de los dos primero valores de test_id
resulting_df['test_id'] = resulting_df['test_id'].apply(lambda x: x[:2] + '_' + x[2:])
print(resulting_df.head())
resulting_df.to_csv('./data/test_predictions.csv', index=False)

                         test_id des_value
0   88_49711373_cane_height_type   INVALID
1  88_49711373_closure_placement   INVALID
2    88_49711373_heel_shape_type     Plano
3     88_49711373_knit_structure   INVALID
4        88_49711373_length_type   INVALID
