In [1]:
import io
import pandas as pd

https://stackoverflow.com/questions/74236711/pandas-outliers-with-and-without-calculations

In [2]:
data = pd.read_fwf(io.StringIO(""" 
|   A |   B |    C |   D |   E |
| 100 |  99 | 1000 | 300 | 250 |
| 665 |   6 |    9 |   1 |   9 |
|   7 | 665 |    4 |   9 |   1 |
|   1 |   3 |    4 |   3 |   6 |
|   1 |   9 |    1 | 665 |   5 |
|   3 |   4 |    6 |   1 |   9 |
|   5 |   9 |    1 |   3 |   2 |
|   1 | 665 |    3 |   2 |   3 |
|   2 | 665 |    9 |   1 |   0 |
|   5 |   0 |    7 |   6 |   5 |
|   0 |   3 |    3 |   7 |   3 |
|   6 |   3 |    0 |   3 |   6 |
|   6 |   6 |    5 |   1 |   5 |""".replace("|", "")))

In [3]:
data

Unnamed: 0,A,B,C,D,E
0,100,99,1000,300,250
1,665,6,9,1,9
2,7,665,4,9,1
3,1,3,4,3,6
4,1,9,1,665,5
5,3,4,6,1,9
6,5,9,1,3,2
7,1,665,3,2,3
8,2,665,9,1,0
9,5,0,7,6,5


In [4]:
stats = data.describe()
stats

Unnamed: 0,A,B,C,D,E
count,13.0,13.0,13.0,13.0,13.0
mean,61.692308,164.384615,80.923077,77.076923,23.384615
std,183.242728,286.569229,276.163135,194.763729,68.14389
min,0.0,0.0,0.0,1.0,0.0
25%,1.0,3.0,3.0,1.0,3.0
50%,5.0,6.0,4.0,3.0,5.0
75%,6.0,99.0,7.0,7.0,6.0
max,665.0,665.0,1000.0,665.0,250.0


In [5]:
stats.loc["lower", :] = stats.T["mean"] - 1.5*(stats.T["75%"] - stats.T["25%"])
stats.loc["upper", :] = stats.T["mean"] + 1.5*(stats.T["75%"] - stats.T["25%"])

In [6]:
stats

Unnamed: 0,A,B,C,D,E
count,13.0,13.0,13.0,13.0,13.0
mean,61.692308,164.384615,80.923077,77.076923,23.384615
std,183.242728,286.569229,276.163135,194.763729,68.14389
min,0.0,0.0,0.0,1.0,0.0
25%,1.0,3.0,3.0,1.0,3.0
50%,5.0,6.0,4.0,3.0,5.0
75%,6.0,99.0,7.0,7.0,6.0
max,665.0,665.0,1000.0,665.0,250.0
lower,54.192308,20.384615,74.923077,68.076923,18.884615
upper,69.192308,308.384615,86.923077,86.076923,27.884615


In [7]:
def quantile_agg(x, alpha=0.05, aggregate=pd.Series.mean):
    return aggregate(x[(x.quantile(alpha/2) < x) & (x < x.quantile(1 - alpha/2))])

In [8]:
def irq_agg(x, factor=1.5, aggregate=pd.Series.mean):
    q1, q3 = x.quantile(0.25), x.quantile(0.75) 
    return aggregate(x[(q1 - factor*(q3 - q1) < x) & (x < q3 + factor*(q3 - q1))])

In [9]:
data.apply(quantile_agg, alpha=0.01)

A    12.454545
B    15.777778
C     4.727273
D    41.625000
E     4.909091
dtype: float64

In [10]:
data.apply(irq_agg)

A     3.363636
B    14.200000
C     4.333333
D     3.363636
E     4.500000
dtype: float64

In [11]:
def analyze(x, alpha=0.05, factor=1.5):
    return pd.Series({
        "p_mean": quantile_agg(x, alpha=alpha),
        "p_median": quantile_agg(x, alpha=alpha, aggregate=pd.Series.median),
        "irq_mean": irq_agg(x, factor=factor),
        "irq_median": irq_agg(x, factor=factor, aggregate=pd.Series.median),
        "standard": x[((x - x.mean())/x.std()).abs() < 1].mean(),
        "mean": x.mean(),
        "median": x.median(),
    })

In [13]:
print(data.apply(analyze).T)

      p_mean  p_median   irq_mean  irq_median   standard        mean  median
A  12.454545       5.0   3.363636         3.0  11.416667   61.692308     5.0
B  15.777778       6.0  14.200000         5.0  14.200000  164.384615     6.0
C   4.727273       4.0   4.333333         4.0   4.333333   80.923077     4.0
D  41.625000       4.5   3.363636         3.0   3.363636   77.076923     3.0
E   4.909091       5.0   4.500000         5.0   4.500000   23.384615     5.0
