In [7]:
# Importing libraries 
# Importing libraries 

import numpy as np 
import pandas as pd 
import scipy as stats 
from scipy.stats import chi2 

# calculateMahalanobis Function to calculate 
# the Mahalanobis distance 
def calculateMahalanobis(y=None, data=None, cov=None): 

	y_mu = y - np.mean(data) 
	if not cov: 
		cov = np.cov(data.values.T) 
	inv_covmat = np.linalg.inv(cov) 
	left = np.dot(y_mu, inv_covmat) 
	mahal = np.dot(left, y_mu.T) 
	return mahal.diagonal() 

# data 
data = { 'Price': [100000, 800000, 650000, 700000, 
				860000, 730000, 400000, 870000, 
				780000, 400000], 
		'Distance': [16000, 60000, 300000, 10000, 
					252000, 350000, 260000, 510000, 
					2000, 5000], 
		'Emission': [300, 400, 1230, 300, 400, 104, 
					632, 221, 142, 267], 
		'Performance': [60, 88, 90, 87, 83, 81, 72, 
						91, 90, 93], 
		'Mileage': [76, 89, 89, 57, 79, 84, 78, 99, 
					97, 99] 
		} 

# Creating dataset 
df = pd.DataFrame(data,columns=['Price', 'Distance', 
								'Emission','Performance', 
								'Mileage']) 

# Creating a new column in the dataframe that holds 
# the Mahalanobis distance for each row 
df['Mahalanobis'] = calculateMahalanobis(y=df, data=df[[ 
'Price', 'Distance', 'Emission','Performance', 'Mileage']]) 

# calculate p-value for each mahalanobis distance 
df['p'] = 1 - chi2.cdf(df['Mahalanobis'], 3) 

# display first five rows of dataframe 
print(df) 


    Price  Distance  Emission  Performance  Mileage  Mahalanobis         p
0  100000     16000       300           60       76     6.055764  0.108928
1  800000     60000       400           88       89     2.579063  0.461172
2  650000    300000      1230           90       89     6.762529  0.079865
3  700000     10000       300           87       57     7.482670  0.058005
4  860000    252000       400           83       79     2.027900  0.566636
5  730000    350000       104           81       84     2.380654  0.497246
6  400000    260000       632           72       78     2.147466  0.542370
7  870000    510000       221           91       99     4.915337  0.178103
8  780000      2000       142           90       97     4.025548  0.258719
9  400000      5000       267           93       99     6.623069  0.084933


  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


In [8]:
!pip install mahalanobis

Collecting mahalanobis
  Downloading mahalanobis-1.2.0.tar.gz (14 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: mahalanobis
  Building wheel for mahalanobis (setup.py) ... [?25ldone
[?25h  Created wheel for mahalanobis: filename=mahalanobis-1.2.0-py3-none-any.whl size=13963 sha256=55a72c596eb7708c5ef52880cff8cddb8f33d4c38267a194c503bd15c5102aea
  Stored in directory: /home/vmonjezi/.cache/pip/wheels/b8/bc/c0/0b2743af71fc775dbd95eb841289c914690ccc3ff95bf8e845
Successfully built mahalanobis
Installing collected packages: mahalanobis
Successfully installed mahalanobis-1.2.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.1.2[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [32]:
import numpy as np
import mahalanobis
input_2D = (np.arange(20) + np.random.normal(0, 0.1, 20)).reshape(-1,2)

mahND = mahalanobis.MahalanobisND(input_2D,100 )

# mahND.mean



# mahND.cov_matrix


# mahND.distd


In [34]:
mahND.calc_distances(np.array([[10,14]]))

array([[18.16899009]])

In [23]:
input_2D

array([[-0.08020819,  0.94029111],
       [ 1.80767343,  2.8521032 ],
       [ 3.99054545,  5.01786769],
       [ 6.15839695,  6.9170261 ],
       [ 8.22760881,  9.03766698],
       [10.07851053, 10.88816534],
       [11.853391  , 12.91937543],
       [13.94733788, 14.97426816],
       [16.00573373, 16.83576748],
       [18.13068798, 19.03040089]])