
# Pandas Quantile – Complete Practical Guide (Single Notebook)

This notebook covers **all quantile-related methods and use-cases in Pandas**,
used in **data cleaning, outlier handling, finance, analytics, and interviews**.


In [None]:

import pandas as pd
import numpy as np


## 1. Sample Dataset

In [None]:

np.random.seed(42)
df = pd.DataFrame({
    "salary": np.random.randint(20000, 120000, 50),
    "income": np.append(np.random.randint(30000, 150000, 48), [500000, 800000]),
    "score": np.random.randint(35, 100, 50),
    "category": np.random.choice(["IT", "HR", "Finance"], 50)
})
df.head()


## 2. Basic Quantile Usage (Series)

In [None]:

df['salary'].quantile(0.25)
df['salary'].quantile(0.50)
df['salary'].quantile(0.75)


## 3. Multiple Quantiles

In [None]:

df['salary'].quantile([0.1, 0.25, 0.5, 0.75, 0.9])


## 4. DataFrame Quantiles

In [None]:

df.quantile(0.5)
df.quantile([0.25, 0.5, 0.75])


## 5. Quantile with Missing Values

In [None]:

df.loc[5:10, 'salary'] = np.nan
df['salary'].quantile(0.5)
df['salary'] = df['salary'].fillna(df['salary'].quantile(0.5))


## 6. Interpolation Methods

In [None]:

df['score'].quantile(0.5, interpolation='linear')
df['score'].quantile(0.5, interpolation='nearest')
df['score'].quantile(0.5, interpolation='lower')
df['score'].quantile(0.5, interpolation='higher')
df['score'].quantile(0.5, interpolation='midpoint')


## 7. IQR Outlier Detection

In [None]:

Q1 = df['income'].quantile(0.25)
Q3 = df['income'].quantile(0.75)
IQR = Q3 - Q1

lower = Q1 - 1.5 * IQR
upper = Q3 + 1.5 * IQR

df_outlier_removed = df[(df['income'] >= lower) & (df['income'] <= upper)]


## 8. Quantile-based Capping (Winsorization)

In [None]:

low = df['income'].quantile(0.01)
high = df['income'].quantile(0.99)

df['income_capped'] = df['income'].clip(low, high)


## 9. Group-wise Quantiles

In [None]:

df.groupby('category')['salary'].quantile(0.5)


## 10. Group-wise Quantile Fill

In [None]:

df['salary'] = df.groupby('category')['salary']                 .transform(lambda x: x.fillna(x.quantile(0.5)))


## 11. Quantile-based Filtering

In [None]:

top_10 = df['score'].quantile(0.9)
df[df['score'] >= top_10]


## 12. Quantile Binning using qcut

In [None]:

df['salary_band'] = pd.qcut(df['salary'], q=4, labels=['Low','Mid','High','Very High'])


## 13. Quantile vs Percentile

In [None]:

df['salary'].quantile(0.75)
np.percentile(df['salary'], 75)


## 14. Quantile inside describe()

In [None]:

df.describe()


## 15. Business Rule Example

In [None]:

high_value_txn = df['income'].quantile(0.95)
df['high_value_flag'] = df['income'] >= high_value_txn


## 16. Edge Cases

In [None]:

empty_series = pd.Series(dtype=float)
empty_series.quantile(0.5)



## ✔ Summary
- quantile(0.5) = median  
- Used for outliers, imputation, segmentation  
- Robust against extreme values  
- Core tool in finance, healthcare, ML  
