In [66]:
import numpy as np
import pandas as pd
from typing import *
import sys
sys.path.append('/Users/emicatx/Downloads/CMOR_438_FALL_2025_Emilia/src')
from rice_ml.preprocess.datatype import *

In [67]:
ArrayLike = Union[np.ndarray, Sequence[float], Sequence[Sequence[float]], pd.DataFrame, pd.Series]

def _ensure_numeric(data_vector: ArrayLike, name: str = 'Data') -> np.ndarray:

    # TODO: docstrings, type hints, and examples

    vector = _1D_vectorized(data_vector, name)
    
    if not np.issubdtype(vector.dtype, np.number):
        try:
            vector = vector.astype(float, copy = False)
        except (TypeError, ValueError) as e:
            raise TypeError(f'All entries in {name} must be numeric') from e
    else:
        vector = vector.astype(float, copy = False)

    return vector

In [68]:
test_array = np.array([True, False, True, False])
_ensure_numeric(test_array)

array([1., 0., 1., 0.])

In [69]:
x = [1, 1, 1]
_1D_vectorized(x, 'x')

array([1, 1, 1])

In [70]:
def euclidean_distance(data_vector_1: ArrayLike, 
                       data_vector_2: ArrayLike, 
                       name_1: str = 'data_vector_1', 
                       name_2: str = 'data_vector_2') -> float:
    
    # TODO: docstrings, type hints, and examples

    vector_1 = _ensure_numeric(data_vector_1, name_1)
    vector_2 = _ensure_numeric(data_vector_2, name_2)

    _shape_match(vector_1, vector_2)

    distance = float(np.linalg.norm((vector_2 - vector_1)))

    return distance

In [71]:
def manhattan_distance(data_vector_1: ArrayLike, 
                       data_vector_2: ArrayLike, 
                       name_1: str = 'data_vector_1', 
                       name_2: str = 'data_vector_2') -> float:
    
    vector_1 = _ensure_numeric(data_vector_1, name_1)
    vector_2 = _ensure_numeric(data_vector_2, name_2)

    _shape_match(vector_1, vector_2)

    distance = float(np.sum(np.abs(vector_2 - vector_1)))

    return distance

In [72]:
def minkowski_distance(data_vector_1: ArrayLike, 
                       data_vector_2: ArrayLike, 
                       p: int,
                       name_1: str = 'data_vector_1', 
                       name_2: str = 'data_vector_2') -> float:
    
    if not isinstance(p, int):
        raise TypeError('p parameter must be an integer')
    if p <= 0:
        raise ValueError('p parameter must be greater than zero')
    
    vector_1 = _ensure_numeric(data_vector_1, name_1)
    vector_2 = _ensure_numeric(data_vector_2, name_2)

    _shape_match(vector_1, vector_2)

    distance = float((np.sum((np.abs(vector_2 - vector_1)) ** p)) ** (1 / p))

    return distance

In [73]:
a = [-1, -2]
b = [2, 2]

In [74]:
euclidean_distance(a, b, 'a', 'b')
manhattan_distance(a,b)
minkowski_distance(a, b, 3)

4.497941445275415

In [75]:
input_1 = pd.Series([1, 1, 1])

output = _ensure_numeric(input_1)

output

array([1., 1., 1.])

In [104]:
X_query = np.array([[1, 2], [3, 4]])
X_train = np.array([[4, 6], [0, 0]])

In [105]:
def _distance_calculations(data_array_1: np.ndarray, data_array_2: np.ndarray, metric: str, p) -> np.ndarray:

    # TODO: docstrings, examples (data_array_1 is query, data_array_2 is testing)

    data_array_1 = _2D_numeric(data_array_1)
    data_array_2 = _2D_numeric(data_array_2)

    distance_matrix = np.full((data_array_1.shape[0], data_array_2.shape[0]), np.nan)
    for index_1, point_1 in enumerate(data_array_1):
        print(point_1)
        for index_2, point_2 in enumerate(data_array_2):
            print(point_2)
            if metric == 'euclidean':
                distance = euclidean_distance(point_1, point_2)
            elif metric == 'manhattan':
                distance = manhattan_distance(point_1, point_2)
            elif metric == 'minkowski':
                distance = minkowski_distance(point_1, point_2, p = p)
            distance_matrix[index_1, index_2] = distance

    return distance_matrix

In [106]:
_distance_calculations(X_query, X_train, 'minkowski', 3)

[1. 2.]
[4. 6.]
[0. 0.]
[3. 4.]
[4. 6.]
[0. 0.]


array([[4.49794145, 2.08008382],
       [2.08008382, 4.49794145]])