# Examples of similarity pipelines



## Feature extracting pipeline:
Dataset is wildlife dataset
Must have metadata attribute

# FeatureDataset object
- Features have features (iterable with len) and metadata (pandas df) properties with the same length.
- metadata must have columns with label (unique identifier of label, eg string or int) and id (unique identifier of the datapoint,  eg string or int)

There are three variants
- Dataset of deep features - datapoint is fixed lengt vector
- Dataset of images - datapoint is image tensor.
- Dataset of local descriptors - datapoint is list of fixed lenght vectors.

# Similarity pipeline:
- similarity - N x M matrix, where N is length of query dataset and  M is lenght of database dataset.

Resulting similarity object have additional two attributes:
- metadata_query - dataframe with metadata inherited from query dataset
- metadata_database - dataframe with metadata inherited from database dataset


In [2]:
from data.dataset import WildlifeDataset
from torchvision import transforms as T
import pandas as pd

root = 'examples/SampleDataset'
metadata = pd.read_csv(f'{root}/metadata.csv', index_col=0)

# Set of local descriptors

- output is list of list with features

In [None]:
from features.sift import SIFTFeatures
from similarity.descriptor import DescriptorMatcher


# Grayscale PIL images
transform = T.Compose([
    T.Resize(size=256),
    T.CenterCrop(size=(224, 224)),
    T.Grayscale(),
])
dataset = WildlifeDataset(metadata, root=root, transform=transform)


# Extract set of SIFT local descriptor for each image.
extractor = SIFTFeatures()
features = extractor(dataset)
print('Shape')
print([i.shape for i in features.features])
print('Datapoint')
print(features.features[0])


# Similarity as number of correspondences below ratio test threshold
similarity = MatchDescriptors(descriptor_dim=128, thresholds=[1e-5, 0.5, 0.9])
similarity.match(features, features)

## Superpoint

In [None]:
from features.superpoint import SuperPointFeatures
from similarity.descriptor import DescriptorMatcher


# Dataset of Grayscale tensors
transform = T.Compose([
    T.Resize(size=256),
    T.CenterCrop(size=(224, 224)),
    T.Grayscale(),
    T.ToTensor(),
])
dataset = WildlifeDataset(metadata, root=root, transform=transform)


# Extract set of superpoint local descriptor for each image.
extractor = SuperPointFeatures()
features = extractor(dataset)
print('Shape')
print([i.shape for i in features.features])
print('Datapoint')
print(features.features[0])


# Similarity as number of correspondences below ratio test threshold
similarity = MatchDescriptors(descriptor_dim=256, thresholds=[1e-10, 0.5, 0.9])
similarity(features, features)

# Similarity pipeline using Deep features

In [None]:
from features.deep import DeepFeatures
from similarity.cosine import CosineSimilarity
import timm


# Dataset of RGB tensors
transform = T.Compose([
    T.Resize(size=256),
    T.CenterCrop(size=(224, 224)),
    T.ToTensor()
])
dataset = WildlifeDataset(metadata, root=root, transform=transform)


# Extract deep features using pretrainend SWIN-T model from TIMM / HugginFace Hub
extractor = DeepFeatures(model=timm.create_model('swin_tiny_patch4_window7_224', num_classes=0, pretrained=True))
features = extractor(dataset)


# Cosine similarity between deep features
similarity = CosineSimilarity()
similarity.calculate(features, features)

# Similarity pipeline using LOFTR

In [3]:
from features.memory import InMemoryFeatures
from similarity.loftr import LOFTRMatcher


# Dataset of Grayscale tensors
transform = T.Compose([
    T.Resize(size=256),
    T.CenterCrop(size=(224, 224)),
    T.Grayscale(),
    T.ToTensor(),
])
dataset = WildlifeDataset(metadata, root=root, transform=transform)


# Loads image tensors to memory for faster access
extractor = InMemoryFeatures()
features = extractor(dataset)


# Similarity by number of LOFTR correspondences
similarity = MatchLOFTR(device='cpu', thresholds=[0.5, 0.9, 0.99])
similarity.calculate(features, features)

100%|█████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 14.40it/s]


TypeError: 'LOFTRMatcher' object is not callable

# Pipeline wrapper