In [1]:
import numpy as np
import pandas as pd

## Median-Based Anamoly Detection

In [2]:
# Median Based Anomaly Detection
# we take absloute difference between every value and the median, if that is greater than reasonable threshold, we consider it as anaomoly

In [3]:
x=pd.Series([2.1,2.3,2.2,4.5,2.4])
median = np.median(x)
threshold = 2  #this is an assumption
outliers = []
for i in x:
    if abs(i-median) > threshold:
        outliers.append(i)
print(outliers)

[4.5]


In [4]:
# in above we found the outlier/anamoly

## Mean-Based Anamoly Detection

In [6]:
# condition for NOT anomoly is:
# (mean-std) <= value <= (mean+std)

# if the value is not in that range, then it is considered as an anomoly
# std is standard deviation

In [7]:
mean = np.mean(x)
std = np.std(x)
outliers= []
for i in x:
    if (i < mean-std) or (i > mean+std):
        outliers.append(i)
outliers

[4.5]

## Z-score-based Anamoly Detection

In [8]:
# formula
# z=(value-mean)/std
# if z is greater than a reasonable treshold, then it is said to be a outlier

In [18]:
outliers=[]
for i in x:
    z = (i-mean)/std
    if z>1.5:
        outliers.append(i)
outliers

[4.5]

## Interquartile Range for Anomaly Detection

In [20]:
# A quartile divides the sorted data into 3 points and 4 intervals.
# first quartile is at 25% point of our data
# second is at 50%
# third is at 75%
# Interquartile is the range between the 3rd point and 1st point
# IRQ = Q3-Q1

# any value < (Q1-1.5*IQR) or value > (Q3+1.5*IQR) is considered as anamoly

In [21]:
Q1,Q3 = np.percentile(x,[25,75])  # this will give the values at 25% and 75%
IQR = Q3-Q1
outliers=[]
for i in x:
    if i < (Q1 - 1.5 * IQR) or i > (Q3 + 1.5 * IQR):
        outliers.append(i)
outliers

[4.5]