In [None]:
from PIL import Image
import matplotlib.pyplot as plt
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import transforms
from superpoint.models.superpoint_pytorch import SuperPoint
from accelerated_features.modules.xfeat import XFeat
from accelerated_features.modules.lighterglue import LighterGlue
import cv2
import numpy as np

xfeat = XFeat()

In [None]:
#img_path = "aerial-image.jpeg"
#img_path = "UFRGS-01-2017.png"
#weights_path = "SuperPoint/weights/superpoint_v6_from_tf.pth"
#img = Image.open(img_path).convert("L")
#input = {"image": transforms.ToTensor()(img).unsqueeze(0)} # from (1, H, W) to (1, 1, H, W)

In [None]:
def generate_random_keypoints(
    image_shape,
    num_keypoints,
    device="cpu",
    dtype=torch.float32,
    operation="random"
):
    """
    Gera keypoints de acordo com o tipo de operação especificada.

    Args:
        image_shape: tuple (H, W) ou (B, C, H, W)
        num_keypoints: int, número de keypoints a gerar
        device: str, 'cpu' ou 'cuda'
        dtype: torch.dtype, tipo dos valores
        operation: 
            - "random": gera keypoints aleatórios uniformes (default)
            - tuple ("localized", (x, y), dispersion): gera keypoints próximos a (x, y)
              com dispersão gaussiana (desvio padrão = dispersion)

    Returns:
        torch.Tensor (B, num_keypoints, 2): coordenadas (x, y)
    """
    # Extrai H e W mesmo que venha de shape 4D
    if len(image_shape) == 4:
        _, _, H, W = image_shape
        B = image_shape[0]
    elif len(image_shape) == 2:
        H, W = image_shape
        B = 1
    else:
        raise ValueError("image_shape deve ser (H, W) ou (B, C, H, W).")

    # Caso 1: operação aleatória uniforme
    if operation == "random":
        xs = torch.rand((B, num_keypoints, 1), device=device, dtype=dtype) * (W - 1)
        ys = torch.rand((B, num_keypoints, 1), device=device, dtype=dtype) * (H - 1)

    # Caso 2: operação localizada em torno de (x, y)
    elif isinstance(operation, tuple) and len(operation) == 3 and operation[0] == "localized":
        _, center, dispersion = operation
        cx, cy = center

        # Cria ruído gaussiano ao redor do centro
        xs = torch.normal(mean=cx, std=dispersion, size=(B, num_keypoints, 1),
                          device=device, dtype=dtype)
        ys = torch.normal(mean=cy, std=dispersion, size=(B, num_keypoints, 1),
                          device=device, dtype=dtype)

        # Clampa coordenadas para ficarem dentro da imagem
        xs = xs.clamp(0, W - 1)
        ys = ys.clamp(0, H - 1)

    else:
        raise ValueError(
            "operation deve ser 'random' ou ('localized', (x, y), dispersion)"
        )

    keypoints = torch.cat([xs, ys], dim=-1)
    return keypoints


In [None]:
plot_n_vectors(
    [feats0["descriptors"][0], feats00['descriptors'][1880]],
    labels=["from user", "from xfeat"]
)

In [None]:
target = feats00['descriptors'][1880]
reference = descs[0]
sim = cosine_sim(target, reference)
sim


In [None]:
def compute_xfeat_at_keypoints(
    model,        # instância do XFeat
    x,            # (B, C, H, W)
    keypoints     # (B, N, 2) em pixels originais
):
    x, rh1, rw1 = model.preprocess_tensor(x)
    B, _, H1, W1 = x.shape

    M1, K1, H1_map = model.net(x)
    M1 = F.normalize(M1, dim=1)

    # pixel original → pixel preprocessado
    kpts_resized = keypoints / torch.tensor(
        [rw1, rh1], device=keypoints.device
    ).view(1, 1, 2)

    feats = model.interpolator(
        M1,
        kpts_resized,
        H=H1,
        W=W1
    )

    feats = F.normalize(feats, dim=-1)

    return feats

In [None]:
descs = compute_xfeat_at_keypoints(model=xfeat, x=scene_input["image"], keypoints=p_kpts)[0]


In [None]:
descs.shape

In [None]:
print(f"ponto detectado pelo xfeat:  {feats00['keypoints'][pos[-1]]}")
print(f"ponto inserido pelo usuario: {feats0['keypoints'][0]}")

In [None]:
p_kpts

In [None]:
dist_final = 100000
pos = []
for e,k in enumerate(feats00['keypoints']):
    dist = k-p_kpts
    dist = 0.5*(dist[0][0][0]**2 + dist[0][0][1]**2)
    if dist < dist_final:
        dist_final = dist
        pos.append(e)
print(pos[-1], dist_final)

In [None]:
def cropassion(img):
    # Carregar e converter para tons de cinza
    reference_img = Image.open(img).convert("L")

    # Converter para numpy
    img_np = np.array(reference_img)

    # Altura atual e nova altura
    h, w = img_np.shape
    new_h = 421

    # Largura proporcional
    scale = new_h / h
    new_w = int(w * scale)

    # Resize proporcional
    resized_img = reference_img.resize((new_w, new_h), Image.BICUBIC)
    resized_np = np.array(resized_img)

    # ---- CROP CENTRAL PARA 421 x 421 ----
    target_size = 421

    # calcular início e fim do corte horizontal
    excess = new_w - target_size
    left = excess // 2
    right = left + target_size

    crop_np = resized_np[:, left:right]
    return crop_np

scene_img = Image.open('UFRGS-01-2017.png').convert("L")
scene_input = {"image": transforms.ToTensor()(scene_img).unsqueeze(0)} 

reference_img = Image.open('009-align.jpg').convert("L")
reference_input = {"image": transforms.ToTensor()(reference_img).unsqueeze(0)}
#crop_np = cropassion('009-align.jpg')
#reference_input = {"image": transforms.ToTensor()(crop_np).unsqueeze(0)} 

particles_kpts = generate_random_keypoints((4800, 4800), num_keypoints=1, operation=("localized", (2860, 1700), 1000), device="cuda") #operation='random'
gt_kpt = torch.tensor([[[2860, 1700]]], device='cuda:0')
particles_kpts = torch.cat((gt_kpt, particles_kpts), dim=1)
p_kpts = torch.tensor([[[1272, 1715]]], device='cuda:0')
print("Mapa da missão:")
feats0 = xfeat.computeAtKeypoints2(scene_input["image"], p_kpts)[0] # particles_kpts)[0]
feats00= xfeat.detectAndCompute(scene_input["image"], top_k = 2000)[0]
print("Imagem da nadir redimensionada at keypoints:")
feats1 = xfeat.detectAndCompute(reference_input["image"], top_k = 1)[0]
feats1['keypoints'] = gt_kpt[0] #1275 1722
#feats1['descriptors'] = feats1['descriptors'].repeat(particles_kpts.shape[1], 1)
print("Imagem da nadir redimensionada global")
desc = xfeat.computeGlobalDescriptor(reference_input["image"], resize_to_receptive=False)[0]
desc['descriptors'] = desc['descriptors'].repeat(particles_kpts.shape[1], 1)
desc['keypoints'] = particles_kpts[0]

feats0.update({'image_size': (scene_input['image'][0][0].shape[1], scene_input['image'][0][0].shape[0])})
feats1.update({'image_size': (scene_input['image'][0][0].shape[1], scene_input['image'][0][0].shape[0])})
desc.update({'image_size': (scene_input['image'][0][0].shape[1], scene_input['image'][0][0].shape[0])})

# VERSOES CROPADAS
#d33 = xfeat.detectAndCompute(transforms.ToTensor()(crop).unsqueeze(0), top_k = 1)[0]
#d44 = xfeat.computeGlobalDescriptor(transforms.ToTensor()(crop).unsqueeze(0), resize_to_receptive=False)[0]
mkpts_0, mkpts_1, _, output = xfeat.match_lighterglue(feats0, desc)

print(mkpts_0, mkpts_1)

keypoints = particles_kpts[0]
keypoints = torch.cat((feats0['keypoints'][0].unsqueeze(0), feats00['keypoints'][1880].unsqueeze(0)), dim=0)
#p_kpts = torch.tensor([[[1272, 1715]]], device='cuda:0')
if keypoints is not None:
    keypoints = keypoints.detach().cpu().numpy()  # [N,2]
    plt.figure(figsize=(12,12))
    plt.imshow(scene_img, cmap="gray")
    plt.scatter(keypoints[:,0], keypoints[:,1], c='r', s=10)  # x,y
    plt.title("Keypoints detectados pelo SuperPoint")
    plt.axis("off")
    plt.show()
else:
    print("A saída não contém 'keypoints'")

#keypoints = keypoints_desc
keypoints = None
if keypoints is not None:
    keypoints = keypoints.detach().cpu().numpy()  # [N,2]
    plt.figure(figsize=(12,12))
    plt.imshow(reference_img, cmap="gray")
    plt.scatter(keypoints[:,0], keypoints[:,1], c='r', s=10)  # x,y
    plt.title("Keypoints detectados pelo SuperPoint")
    plt.axis("off")
    plt.show()
else:
    print("A saída não contém 'keypoints'")

# Plot para visualização
plt.figure(figsize=(6, 6))
plt.imshow(crop_np, cmap="gray")
plt.title(f"Crop final: {crop_np.shape[1]}x{crop_np.shape[0]}")
plt.axis("off")
plt.show()


def plot_n_vectors(vectors, labels=None, title="Plot de múltiplos vetores 1×64"):
    """
    Plota N vetores PyTorch de shape (1, 64) no mesmo gráfico.

    Args:
        vectors (list): lista de tensores PyTorch, cada um com shape (1, 64)
        labels (list): lista opcional de rótulos. Se None, gera rótulos automáticos.
        title (str): título do gráfico
    """
    # Número de vetores
    n = len(vectors)

    # Rotulagem automática caso labels não seja fornecido
    if labels is None:
        labels = [f"vetor_{i}" for i in range(n)]

    plt.figure(figsize=(10, 5))

    for i, v in enumerate(vectors):
        # converter para numpy e remover dim extra
        v_np = v.detach().cpu().numpy().squeeze()  # vira (64,)

        plt.plot(v_np, label=labels[i], alpha=0.8)

    plt.title(title)
    plt.xlabel("Dimensão (0–63)")
    plt.ylabel("Valor")
    plt.grid(True, alpha=0.25)
    plt.legend()
    plt.tight_layout()
    plt.show()

#plot_n_vectors(
#    [feats0["descriptors"][0], feats0["descriptors"][1], feats1["descriptors"]],
#    labels=["particle_wrong", "particle_true", "uav"]
#)

def cosine_sim(feat0, feat1):
    """
    Calcula a similaridade do cosseno entre dois vetores (1, D).

    Args:
        feat0 (torch.Tensor): tensor shape (1, D)
        feat1 (torch.Tensor): tensor shape (1, D)

    Returns:
        float: similaridade do cosseno
    """
    # remover dimensões extras -> (D,)
    v0 = feat0.squeeze()
    v1 = feat1.squeeze()

    # similaridade do cosseno
    cos = F.cosine_similarity(v0, v1, dim=0)

    return cos.item()

"""
d0 = feats0["descriptors"][1]   # shape (1, 64)
d1 = feats0["descriptors"][0]
d2 = feats1["descriptors"]   # shape (1, 64)
d3 = d33["descriptors"]
d4 = d44["descriptors"]
d5 = desc["descriptors"][0]

sim1 = cosine_sim(d0, d2)
sim2 = cosine_sim(d1, d2)
sim3 = cosine_sim(d3, d2)
sim4 = cosine_sim(d4, d2)
sim8 = cosine_sim(d3, d5)
sim9 = cosine_sim(d4, d5)
sim10 = cosine_sim(d1, d5)
sim11 = cosine_sim(d0, d5)

print("Similaridade usando D&C para camera:")
print("com a particula errada: ", sim1)
print("com particula certa:    ", sim2)
print("com cropada D&C:        ", sim3)
print("com cropada CG:         ", sim4)

print("\nSimilaridade usando CG para camera:")
print("com a particula errada: ", sim11)
print("com particula certa:    ", sim10)
print("com cropada D&C:        ", sim8)
print("com cropada CG:         ", sim9)
"""

In [None]:
def crop_centered(image, center, patch_size):
    """
    image: (C, H, W)
    center: (x, y)
    patch_size: int
    """
    _, C, H, W = image.shape
    x, y = center

    half = patch_size // 2

    x1 = int(x - half)
    y1 = int(y - half)
    x2 = int(x + half)
    y2 = int(y + half)

    # limites reais da imagem
    ix1 = max(0, x1)
    iy1 = max(0, y1)
    ix2 = min(W, x2)
    iy2 = min(H, y2)
    crop = image[:, :, iy1:iy2, ix1:ix2]

    # padding se saiu da imagem
    pad_left   = ix1 - x1
    pad_top    = iy1 - y1
    pad_right  = x2 - ix2
    pad_bottom = y2 - iy2

    crop = F.pad(
        crop,
        (pad_left, pad_right, pad_top, pad_bottom),
        mode="constant",
        value=0.0
    )

    return crop  # (C, patch_size, patch_size)

def pad_to_square(tensor):
    """
    tensor: (C, H, W)
    """
    _, C, H, W = tensor.shape
    size = max(H, W)

    pad_h = size - H
    pad_w = size - W

    pad_top = pad_h // 2
    pad_bottom = pad_h - pad_top
    pad_left = pad_w // 2
    pad_right = pad_w - pad_left

    return F.pad(
        tensor,
        (pad_left, pad_right, pad_top, pad_bottom),
        mode="constant",
        value=0.0
    )

def resize_patch(patch, target_size=416):
    """
    patch: (C, H, W)
    """

    patch = F.interpolate(
        patch,
        size=(target_size, target_size),
        mode="bilinear",
        align_corners=False
    )
    return patch#.squeeze(0)

def cosine_similarity(a, b):
    return F.cosine_similarity(a.unsqueeze(0), b.unsqueeze(0)).item()

def evaluate_patch_scales(
    image,
    keypoint,
    D_ref,
    model,
    patch_sizes=(64, 96, 128, 160, 192, 224, 256),
    input_size=416
):
    """
    Retorna:
        dict {patch_size: similarity}
    """
    sims = {}

    for ps in patch_sizes:
        crop = crop_centered(image, keypoint, ps)
        crop = pad_to_square(crop)
        crop = resize_patch(crop, input_size)

        D_patch = model.computeGlobalDescriptor(crop, resize_to_receptive=False)[0]


        if D_patch['descriptors'].ndim == 2:
            D_patch = D_patch['descriptors'].squeeze(0)

        sims[ps] = cosine_similarity(D_patch, D_ref)
        print(f"patch: {ps} - sim: {sims[ps]}")

    return sims

def best_patch_size(similarities):
    return max(similarities, key=similarities.get)

D_ref = feats1["descriptors"][0] # from local
x, y = feats0['keypoints'][1]

sims = evaluate_patch_scales(
    image=scene_input['image'],
    keypoint=(x, y),
    D_ref=D_ref,
    model=xfeat
)

best_size = best_patch_size(sims)
print()

D_ref = desc["descriptors"][0] # from global
x, y = feats0['keypoints'][0]

sims = evaluate_patch_scales(
    image=scene_input['image'],
    keypoint=(x, y),
    D_ref=D_ref,
    model=xfeat
)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def plot_receptive_field(img, keypoint, rf_size=421, stride=32, color='lime', linewidth=2):
    """
    Plota o campo receptivo aproximado do XFeat centrado em um keypoint.

    Args:
        img (np.ndarray): imagem (H, W, 3) ou (H, W)
        keypoint (tuple): (x, y) na imagem original
        rf_size (int): tamanho do campo receptivo (em pixels)
        stride (int): stride acumulado da rede
    """
    import numpy as np

    # Converte tensor para numpy e remove dimensões extras
    if isinstance(img, torch.Tensor):
        img = img.detach().cpu().numpy()
    if img.ndim == 4:  # (B, C, H, W)
        img = img[0]   # remove batch
    if img.ndim == 3 and img.shape[0] == 1:  # (1, H, W) -> (H, W)
        img = img[0]
    elif img.ndim == 3 and img.shape[0] == 3:  # (3, H, W) -> (H, W, 3)
        img = np.transpose(img, (1, 2, 0))

    H, W = img.shape[:2]
    cx, cy = keypoint

    # Define bordas do campo receptivo
    half_rf = rf_size // 2
    x0 = max(cx - half_rf, 0)
    y0 = max(cy - half_rf, 0)
    x1 = min(cx + half_rf, W)
    y1 = min(cy + half_rf, H)

    fig, ax = plt.subplots(figsize=(10, 10))
    ax.imshow(img, cmap='gray' if img.ndim == 2 else None)

    # Retângulo do campo receptivo
    rect = patches.Rectangle((x0, y0), x1 - x0, y1 - y0,
                             linewidth=linewidth, edgecolor=color, facecolor='none')
    ax.add_patch(rect)

    # Keypoint central
    ax.scatter(cx, cy, s=50, c='red', marker='x', label='Centro (keypoint)')

    ax.set_title(f"Campo Receptivo ({rf_size} px) centrado em ({cx}, {cy})")
    ax.legend()
    plt.show()

def plot_receptive_field2(img, keypoint, rf_size=421, stride=32, 
                         color='lime', linewidth=2):
    """
    Plota o campo receptivo aproximado do XFeat centrado em um keypoint,
    e retorna a imagem recortada correspondente ao RF.

    Args:
        img (np.ndarray or torch.Tensor): imagem (H, W, 3) ou (H, W)
        keypoint (tuple): (x, y) na imagem original
        rf_size (int): tamanho do campo receptivo (em pixels)
        stride (int): stride acumulado da rede

    Returns:
        crop_img (np.ndarray): imagem recortada do campo receptivo (Hc, Wc[, C])
    """

    # ----------- Normalização da imagem -----------
    if isinstance(img, torch.Tensor):
        img = img.detach().cpu().numpy()

    if img.ndim == 4:        # (B, C, H, W)
        img = img[0]
    if img.ndim == 3 and img.shape[0] in (1, 3):  # (C, H, W)
        img = np.transpose(img, (1, 2, 0))

    H, W = img.shape[:2]
    cx, cy = keypoint

    # ----------- Coordenadas do RF -----------
    half_rf = rf_size // 2
    x0 = max(cx - half_rf, 0)
    y0 = max(cy - half_rf, 0)
    x1 = min(cx + half_rf, W)
    y1 = min(cy + half_rf, H)

    # ----------- Crop da imagem -----------
    crop_img = img[y0:y1, x0:x1].copy()

    # ----------- Plot -----------
    fig, ax = plt.subplots(figsize=(10, 10))
    ax.imshow(img, cmap='gray' if img.ndim == 2 else None)

    rect = patches.Rectangle(
        (x0, y0),
        x1 - x0,
        y1 - y0,
        linewidth=linewidth,
        edgecolor=color,
        facecolor='none'
    )
    ax.add_patch(rect)

    ax.scatter(cx, cy, s=50, c='red', marker='x', label='Centro (keypoint)')
    ax.set_title(f"Campo Receptivo ({rf_size} px) centrado em ({cx}, {cy})")
    ax.legend()
    plt.show()

    return crop_img




def plot_receptive_field3(img, keypoint, rf_size=421, stride=32, 
                          color='lime', linewidth=2):
    """
    Plota o campo receptivo e retorna o crop do RF.
    """

    # ----------- Normalização da imagem -----------
    if isinstance(img, torch.Tensor):
        img = img.detach().cpu().numpy()

    # Se (B, C, H, W)
    if img.ndim == 4:
        img = img[0]

    # Se (C, H, W)
    if img.ndim == 3 and img.shape[0] in (1, 3):
        img = np.transpose(img, (1, 2, 0))

    H, W = img.shape[:2]

    # ----------- Coordenadas do keypoint -----------
    cx, cy = keypoint
    cx = int(cx)
    cy = int(cy)

    half_rf = rf_size // 2

    # ----------- Sempre converter para int -----------
    x0 = int(max(cx - half_rf, 0))
    y0 = int(max(cy - half_rf, 0))
    x1 = int(min(cx + half_rf, W))
    y1 = int(min(cy + half_rf, H))

    # ----------- Crop -----------
    crop_img = img[y0:y1, x0:x1].copy()

    # ----------- Plot -----------
    """
    fig, ax = plt.subplots(figsize=(10, 10))
    ax.imshow(img, cmap='gray' if img.ndim == 2 else None)

    rect = patches.Rectangle(
        (x0, y0),
        x1 - x0,
        y1 - y0,
        linewidth=linewidth,
        edgecolor=color,
        facecolor='none'
    )
    ax.add_patch(rect)

    ax.scatter(cx, cy, s=50, c='red', marker='x')
    ax.set_title(f"RF {rf_size}px centrado em ({cx}, {cy})")
    plt.show()
    """
    return crop_img


In [None]:
import time
keypoint = feats0['keypoints'][0].cpu().numpy()  # centro da imagem 1280x704
print(keypoint)
nadir = xfeat.x_receptive
#particle_img = plot_receptive_field(scene_input['image'], keypoint, rf_size=421, stride=32)

init_t = time.time()
c = 0
#for kp in feats0['keypoints']:
#    crop = plot_receptive_field3(scene_input['image'], kp, rf_size=421)
#    c= c+1
crop = plot_receptive_field3(scene_input['image'], keypoint, rf_size=421)
end_t = time.time()
print(c)
all_time = (end_t-init_t)
print("time: ", all_time)
#print(f"avg_time: {(all_time/c)*10**3} ms")
plt.imshow(crop, cmap='gray')
plt.title("Crop retornado")
plt.show()

In [None]:
feats0['keypoints']

In [None]:
crop.shape

In [None]:
#2853 1681
keypoint = desc['keypoints'][0].cpu().numpy()  # centro da imagem 1280x704

nadir = xfeat.x_receptive
plot_receptive_field(nadir, keypoint, rf_size=421, stride=32)

In [None]:
similarities = F.cosine_similarity(desc['descriptors'], feats0['descriptors'], dim=-1)
for e,s in enumerate(similarities):
    print(e,s,feats0['keypoints'][e].tolist())

In [None]:
def compute_receptive_field(model, input_size):
    """
    Calcula o campo receptivo efetivo de cada camada Conv2d da rede.
    
    Args:
        model (nn.Module): modelo PyTorch
        input_size (tuple): (B, C, H, W)
    
    Returns:
        list of dicts com nome, RF, stride acumulado e tamanho de saída
    """
    rf = 1      # campo receptivo inicial
    j = 1       # jump (passo)
    start = 0   # offset (pode ser usado pra coordenadas absolutas)
    
    info = []
    hooks = []

    def hook_fn(module, inp, out):
        nonlocal rf, j, start
        if isinstance(module, nn.Conv2d):
            k = module.kernel_size[0]
            s = module.stride[0]
            p = module.padding[0]
            d = module.dilation[0]
            
            rf = rf + ( (k - 1) * d ) * j
            start = start + ((k - 1)/2 - p) * j
            j = j * s

            info.append({
                "layer": module.__class__.__name__,
                "kernel": k,
                "stride": s,
                "padding": p,
                "rf": rf,
                "jump": j,
                "output_shape": tuple(out.shape)
            })

    # registrar hooks
    for m in model.modules():
        if isinstance(m, nn.Conv2d):
            hooks.append(m.register_forward_hook(hook_fn))
    
    x = torch.zeros(input_size)
    model.eval()
    with torch.no_grad():
        xfeat.detectAndCompute(x, top_k = 1)
    
    # remover hooks
    for h in hooks:
        h.remove()
    
    return info

In [None]:
rf_info = compute_receptive_field(xfeat, input_size=transforms.ToTensor()(reference_img).unsqueeze(0).shape) 
rf_info = compute_receptive_field(xfeat, input_size=transforms.ToTensor()(scene_img).unsqueeze(0).shape)
for i, layer in enumerate(rf_info):
    print(f"{i:02d} | {layer['layer']:<10} | RF={layer['rf']:>3} | Jump={layer['jump']:>2} | Out={layer['output_shape']}")

In [None]:
#hist_np = np.array(hist)
#print(f"extract 50.0000 particles points view using xfeat: {hist_np.mean():.3f} ± {hist_np.std():.4f} ms")