In [2]:
# default_exp utils.distances

In [6]:
# export

import numpy as np
from abc import ABC, abstractmethod

## distances

> Generic interface for defining distance metrics for multiple purposes (clustering, sample sets comparison, etc).

In [7]:
# export


class CustomDistance(ABC):
    
    @abstractmethod
    def compute_distance(x: np.ndarray, y: np.ndarray) -> float:
        """
        Computes the distance (dissimilarity) metric among 2 real vectors
        """
        pass

Define custom distance (or dissimilarity) metrics

In [9]:
# export

class EuclideanDistance(CustomDistance):
    
    def compute_distance(x: np.ndarray, y: np.ndarray) -> float:
        return np.linalg.norm(x - y)

In [11]:
# export

class JaccardDistance(CustomDistance):
    def compute_distance(x: np.ndarray, y: np.ndarray) -> float:
        x_set = set(x)
        y_set = set(y)
    
        jacc_idx= len(x_set & y_set) / len(x_set | y_set)
        
        return jacc_idx

In [12]:
from nbdev.export import notebook2script
notebook2script()

Converted 0.1_mgmnt.prep.ipynb.
Converted 0.2_mgmnt.prep.files_mgmnt.ipynb.
Converted 0.3_mgmnt.prep.bpe_tokenization.ipynb.
Converted 0.4_mgmnt.prep.tokenization_counting.ipynb.
Converted 1.1_exp.info.ipynb.
Converted 1.2_exp.desc.metrics.java.ipynb.
Converted 1.4_exp.metrics_python.ipynb.
Converted 1.5_exp.metrics_java.ipynb.
Converted 2.0_repr.codebert.ipynb.
Converted 2.0_repr.i.ipynb.
Converted 2.1_repr.codeberta.ipynb.
Converted 2.1_repr.roberta.train.ipynb.
Converted 2.2_repr.roberta.eval.ipynb.
Converted 2.3_repr.word2vec.train.ipynb.
Converted 2.6_repr.word2vec.eval.ipynb.
Converted 2.7_repr.distmetrics.ipynb.
Converted 2.8_repr.sentence_transformers.ipynb.
Converted 3.1_traceability.unsupervised.eda.ipynb.
Converted 3.2_traceability.unsupervised.approach.d2v.ipynb.
Converted 3.2_traceability.unsupervised.approach.w2v.ipynb.
Converted 4.0_infoxplainer.ir.ipynb.
Converted 4.1_infoxplainer.ir.unsupervised.d2v.ipynb.
Converted 4.2_infoxplainer.ir.unsupervised.w2v.ipynb.
Converted