# First approach

## Imports

In [53]:
import torch
import albumentations as A
import os 
import numpy as np
import time
import re
import json

from albumentations.pytorch import ToTensorV2
from model import UNET
from mlp import MLP,read_predicted_labels
from PIL import Image
from utils import load_checkpoint,save_predictions_as_imgs
from torchvision.utils import save_image
from mask2label import preprocess_image,normalise_json2,reconstruct_output2

## Hyperparameters

In [54]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
#MODEL WEIGHTS
UNET_WEIGHTS_PATH = "./my_checkpoint.pth.tar"

MLP_1 =        "./pretrained/lr_1e-3_h_200-200_best_model.tar"
MLP_2 =        "./pretrained/lr_1e-3_h_250-250_best_model.tar"
MLP_3 =        "./pretrained/lr_1e-4_h_200-200_best_model.tar"
MLP_4 =        "./pretrained/lr_1e-4_h_250-250_best_model.tar"
MLP_5 =        "./pretrained/lr_1e-5_h_200-200_best_model.tar"
MLP_6 =        "./pretrained/lr_1e-5_h_250-250_best_model.tar"

#IMAGE_DIRECTORIES
PHOTO = "./data_1a/photo"
PHOTO_MASK = "./data_1a/photo_mask"
PHOTO_MASK_LABEL = "./data_1a/photo_mask_label"
PRED_SCALE_FACTORS = "./scale_factors_pred.txt"
GT_SCALE_FACTORS = "./scale_factors_gt.txt"
#IMAGE PROPERTIES (U_NET)
IMAGE_HEIGHT = 270 #135,270,540,1080
IMAGE_WIDTH  = 480  #240,480,960,1920

#MODEL_PROPERTIES (MLP)
INPUT_SIZE= 8
OUTPUT_SIZE = 3
HIDDEN_SIZE1 = [200,200]
HIDDEN_SIZE2 = [250,250]

## Image2Mask (U-Net)

### U_NET DEFINITION AND WEIGHT DOWNLOAD

In [55]:
model = UNET(in_channels=3, out_channels=1).to(DEVICE);
load_checkpoint(torch.load(UNET_WEIGHTS_PATH, map_location=DEVICE), model);
model.eval();

=> Loading checkpoint


### Image Transformations

In [56]:
# TEST TRANSFORMATIONS
test_transforms = A.Compose(
    [
        A.Resize(height=IMAGE_HEIGHT, width=IMAGE_WIDTH),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
        ),
        ToTensorV2(),
    ],
)

### IMAGE2MASK

In [57]:
start_time = time.time()  # Start time for performance measurement

# Get the single image file from the PHOTO directory
image_file = os.listdir(PHOTO)[1]
# Extract the index from the filename
match = re.search(r'image_(\d+).jpg', image_file)
if match:
    index = int(match.group(1))
    image_path = os.path.join(PHOTO, image_file)
    image = np.array(Image.open(image_path))
    
    # Transform and predict
    transformed = test_transforms(image=image)
    image = transformed["image"].unsqueeze(0).to(DEVICE)
    
    with torch.no_grad():
        predictions = model(image)
    predictions = torch.sigmoid(predictions)
    predictions = (predictions > 0.5).float()
    
    # Save the predicted mask
    filepath = os.path.join(PHOTO_MASK, f"pred_mask_{index}.jpg")
    save_image(predictions, filepath)

### MASK2LABEL (Camera_Frame)

In [58]:
def predict_labels(PRED_MASK_DIR, SAVING_FOLDER):
    """
    Loads the single data mask file, predicts the labels, and saves it in a specified folder as label_i.txt.
    """
    # Assume there's only one mask file in the directory
    mask_file = os.listdir(PRED_MASK_DIR)[1]
    match = re.match(r'pred_mask_(\d+)\.jpg', mask_file)
    if match:
        i = int(match.group(1))
        mask_path = os.path.join(PRED_MASK_DIR, mask_file)
        pieces_features = preprocess_image(mask_path)  # Assuming preprocess_image is defined elsewhere

        # Construct the saving path for the label file
        save_path = os.path.join(SAVING_FOLDER, f'label_{i}.txt')

        # Save the predicted features to a file
        with open(save_path, 'w') as file:
            json.dump(pieces_features, file)

In [59]:
predict_labels(PHOTO_MASK,PHOTO_MASK_LABEL)

### Label2Normalise (Camera_Frame)

In [60]:
def normalise_json(input_dir, scale_factors_filename):
    # Buscar el primer archivo .txt en el directorio
    json_file = next((f for f in os.listdir(input_dir) if f.endswith('.txt')), None)
    if not json_file:
        raise FileNotFoundError("No .txt file found in the directory.")
    json_path = os.path.join(input_dir, json_file)
    with open(json_path, 'r') as file:
        data = json.load(file)
    with open(scale_factors_filename, 'r') as file:
        scale_factors = json.load(file)
    
    for entry in data:
        for key, value in entry.items():
            if key == 'centroid':
                x_key, y_key = 'centroid_x', 'centroid_y'
                print(entry[key][0])
                entry[key][0] = normalize_value(entry[key][0], scale_factors.get(x_key, {}))
                print(entry[key][0])
                entry[key][1] = normalize_value(entry[key][1], scale_factors.get(y_key, {}))
            else:
                if key in scale_factors and isinstance(value, (int, float)):
                    entry[key] = normalize_value(value, scale_factors[key])
    with open(json_path, 'w') as file:
        json.dump(data, file, indent=4)
def normalize_value(value, factor):
    if not factor:  
        return value
    norm_value = (value - factor['mean']) / factor['std'] if factor['std'] else value - factor['mean']
    return 2 * (norm_value - factor['min_after_std']) / (factor['max_after_std'] - factor['min_after_std']) - 1 if factor['max_after_std'] - factor['min_after_std'] else norm_value

In [61]:
normalise_json(PHOTO_MASK_LABEL, PRED_SCALE_FACTORS);

432
1.0


### CameraFrame2RobotFrame

In [62]:
file_label = os.listdir(PHOTO_MASK_LABEL)[1]
label_path = os.path.join(PHOTO_MASK_LABEL, file_label)
label= [read_predicted_labels(label_path)]
pred_labels_train = torch.tensor(label, dtype=torch.float).squeeze()
print(pred_labels_train)

tensor([-0.3102, -0.0290,  1.0000,  1.0000,  1.0000, -1.0000, -1.0000,  1.0000])


In [63]:
def reconstruct_output2(scale_factors_filename, tensor_values):
    """
    Correctly reconstructs the original scale of values given the path to a file containing scale factors,
    mean, std, min, max, and a tensor of normalized values.
    
    Args:
        scale_factors_filename (str): The path to the file containing the scale factors.
        tensor_values (torch.Tensor): A tensor containing the normalized values for each dimension (x, y, z or x, y).
    
    Returns:
        torch.Tensor: A tensor containing the values in their original scale for each dimension.
    """
    # Load scale factors from the file
    with open(scale_factors_filename, 'r') as file:
        scale_factors = json.load(file)

    # Prepare a tensor to hold the reconstructed values
    original_values = torch.zeros_like(tensor_values)

    keys = ['x', 'y', 'z'][:len(tensor_values)]
    for i, key in enumerate(keys):
        if key in scale_factors:
            factor = scale_factors[key]
            # Invert range normalization: x = (norm_val * (x_max - x_min)) + x_min
            value_range = (tensor_values[i] * (factor['max'] - factor['min'])) + factor['min']
            # Invert centered scaling: x = (x * std) + mean
            original_values[i] = (value_range * factor['std']) + factor['mean']
        else:
            original_values[i] = tensor_values[i]

    return original_values

#### Weight download

In [65]:
model1 = MLP(INPUT_SIZE, HIDDEN_SIZE1, OUTPUT_SIZE);
model2 = MLP(INPUT_SIZE, HIDDEN_SIZE2, OUTPUT_SIZE);
model3 = MLP(INPUT_SIZE, HIDDEN_SIZE1, OUTPUT_SIZE);
model4 = MLP(INPUT_SIZE, HIDDEN_SIZE2, OUTPUT_SIZE);
model5 = MLP(INPUT_SIZE, HIDDEN_SIZE1, OUTPUT_SIZE);
model6 = MLP(INPUT_SIZE, HIDDEN_SIZE2, OUTPUT_SIZE);

model1.load_state_dict(torch.load(MLP_1));
model2.load_state_dict(torch.load(MLP_2));
model3.load_state_dict(torch.load(MLP_3));
model4.load_state_dict(torch.load(MLP_4));
model5.load_state_dict(torch.load(MLP_5));
model6.load_state_dict(torch.load(MLP_6));

model1.eval() ;
model2.eval() ;
model3.eval() ;
model4.eval() ;
model5.eval() ;
model6.eval() ;

output1= model1(pred_labels_train)
print(output1)
output1=reconstruct_output2(GT_SCALE_FACTORS,output1)
print(output1)
print("\n")

output2= model2(pred_labels_train)
print(output2)
output2=reconstruct_output2(GT_SCALE_FACTORS,output2)
print(output2)
print("\n")

output3= model3(pred_labels_train)
print(output3)
output3=reconstruct_output2(GT_SCALE_FACTORS,output3)
print(output3)
print("\n")

output4= model4(pred_labels_train)
print(output4)
output4=reconstruct_output2(GT_SCALE_FACTORS,output4)
print(output4)
print("\n")

output5= model5(pred_labels_train)
print(output5)
output5=reconstruct_output2(GT_SCALE_FACTORS,output5)
print(output5)
print("\n")

output6= model6(pred_labels_train)
print(output6)
output6=reconstruct_output2(GT_SCALE_FACTORS,output6)
print(output6)

tensor([ 0.4266,  0.3281, -0.0019], grad_fn=<TanhBackward0>)
tensor([427.9802, -85.9397,   5.0000], grad_fn=<CopySlices>)


tensor([ 0.4756,  0.5020, -0.0015], grad_fn=<TanhBackward0>)
tensor([442.6817,   0.9943,   5.0000], grad_fn=<CopySlices>)


tensor([ 9.9798e-01,  9.3194e-01, -4.6194e-04], grad_fn=<TanhBackward0>)
tensor([599.3950, 215.9720,   5.0000], grad_fn=<CopySlices>)


tensor([ 0.9919,  0.9254, -0.0024], grad_fn=<TanhBackward0>)
tensor([597.5636, 212.7009,   5.0000], grad_fn=<CopySlices>)


tensor([0.7252, 0.7489, 0.0290], grad_fn=<TanhBackward0>)
tensor([517.5580, 124.4740,   5.0000], grad_fn=<CopySlices>)


tensor([ 0.8013,  0.8202, -0.0038], grad_fn=<TanhBackward0>)
tensor([540.3989, 160.0830,   5.0000], grad_fn=<CopySlices>)


#### Resuts

In [66]:
l=[output1,output2,output3,output4,output5,output6]
concatenated_tensor = torch.stack(l)
mean_tensor = torch.mean(concatenated_tensor, dim=0)
std_tensor = torch.std(concatenated_tensor, dim=0)
print(f"Media por dimensión: {mean_tensor}, Desviación estándar por dimensión: {std_tensor}")

Media por dimensión: tensor([520.9296, 104.7141,   5.0000], grad_fn=<MeanBackward1>), Desviación estándar por dimensión: tensor([ 73.7324, 122.1493,   0.0000], grad_fn=<StdBackward0>)
