In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
class_1_data = [
    [0.89, -0.09, -1.02],
    [-0.75, 0.25, -0.68],
    [-0.45, -0.21, -0.98],
    [-0.24, 0.19, -0.99],
    [-0.01, 0.07, -1.15],
    [0.18, -0.01, -1.01],
    [0.51, 0.04, -0.95],
    [-2.53, -0.31, -0.91]
]

class_2_data = [
    [1.63, 1.38, 1.07],
    [1.73, 1.44, 1.22],
    [4.02, 1.58, 1.20],
    [-0.26, 1.59, 0.87],
    [4.23, 1.26, 1.05],
    [2.34, 1.51, 0.81],
    [3.0, 1.63, 0.74]
]

In [3]:
df1 = pd.DataFrame(class_1_data, columns = ['R', 'G', 'B'])
df2 = pd.DataFrame(class_2_data, columns = ['R', 'G', 'B'])
P = np.array([-0.2, 1.3, 1.1])

In [4]:
def covariance_matrix(df):
    n = df.shape[0]
    cols = df.columns
    matrix = []
    for i in range(len(cols)):
        row = []
        X = df[cols[i]]
        x_mean = np.mean(X)
        X_var = X - x_mean
        for j in range(len(cols)):
            Y = df[cols[j]]
            y_mean = np.mean(Y)
            Y_var = Y - y_mean
            acc = np.sum(X_var * Y_var)
            row.append(acc/(n-1))
        matrix.append(row)
    return np.array(matrix)

In [5]:
cov_1 = covariance_matrix(df1)
cov_1

array([[ 1.08402857,  0.08035714, -0.0541    ],
       [ 0.08035714,  0.03612679,  0.00655893],
       [-0.0541    ,  0.00655893,  0.01778393]])

In [6]:
cov_2 = covariance_matrix(df2)
cov_2

array([[ 2.4087619 , -0.0567881 ,  0.06912857],
       [-0.0567881 ,  0.0175619 , -0.0109381 ],
       [ 0.06912857, -0.0109381 ,  0.03602857]])

In [7]:
def mahalanobis(u, v, cov):
    cov_inv = np.linalg.inv(cov)
    diff = u - v
    return np.sqrt(np.dot(np.dot(diff, cov_inv), diff))

In [8]:
print("Mahalanobis distances for class 1 to P:")
for x in df1.iloc:
    print(f"From {list(x)} = {mahalanobis(x, P, cov_1)}")

cent_1 = np.mean(df1)
print(f"Mahalanobis distance to class 1 centroid {list(cent_1)} = {mahalanobis(cent_1, P, cov_1)}")

Mahalanobis distances for class 1 to P:
From [0.89, -0.09, -1.02] = 16.851816804055304
From [-0.75, 0.25, -0.68] = 14.824457682516222
From [-0.45, -0.21, -0.98] = 17.0398831299096
From [-0.24, 0.19, -0.99] = 17.198716062828456
From [-0.01, 0.07, -1.15] = 18.371036052858923
From [0.18, -0.01, -1.01] = 17.050338201341905
From [0.51, 0.04, -0.95] = 16.43027369490105
From [-2.53, -0.31, -0.91] = 17.460134903535064
Mahalanobis distance to class 1 centroid [-0.3, -0.008749999999999997, -0.96125] = 16.85200629540704


In [9]:
print("Mahalanobis distances for class 2 to P:")
for x in df2.iloc:
    print(f"From {list(x)} = {mahalanobis(x, P, cov_2)}")

cent_2 = np.mean(df2)
print(f"Mahalanobis distance to class 2 centroid {list(cent_1)} = {mahalanobis(cent_2, P, cov_1)}")

Mahalanobis distances for class 2 to P:
From [1.63, 1.38, 1.07] = 1.5264132970069084
From [1.73, 1.44, 1.22] = 2.164891225798348
From [4.02, 1.58, 1.2] = 4.1972786672274
From [-0.26, 1.59, 0.87] = 2.296074592755654
From [4.23, 1.26, 1.05] = 3.0145774092635103
From [2.34, 1.51, 0.81] = 2.945498214186853
From [3.0, 1.63, 0.74] = 4.00722391517714
Mahalanobis distance to class 2 centroid [-0.3, -0.008749999999999997, -0.96125] = 2.4942197605150027


In [10]:
def euclidean(u, v):
    return np.sqrt(np.sum(np.square(u - v)))

In [11]:
euclidean(cent_1, P)

2.4436812650180055

In [12]:
euclidean(cent_2, P)

2.593003931303441