In [1]:
import numpy as np
import pandas as pd

In [2]:
data = [10, 12, 15, 15, 16, 18, 20, 25, 30, 50, 100, 200]  # Example dataset
cutoff = 1.5  # Default IQR cutoff

In [3]:
if not isinstance(data, pd.Series):
    data = pd.Series(data)

In [4]:
Q1 = data.quantile(0.25)
Q3 = data.quantile(0.75)

In [7]:
# Calculate the Interquartile Range (IQR)
IQR = Q3 - Q1

In [8]:
lower_bound = Q1 - (cutoff * IQR)
upper_bound = Q3 + (cutoff * IQR)

In [9]:
filtered_data = data[(data >= lower_bound) & (data <= upper_bound)]

In [12]:
print("Original Data: ", data.to_list())
print("Filtered Data: ", filtered_data.tolist())

Original Data:  [10, 12, 15, 15, 16, 18, 20, 25, 30, 50, 100, 200]
Filtered Data:  [10, 12, 15, 15, 16, 18, 20, 25, 30, 50]


In [2]:
def remove_outliers(data, cutoff=1.5):
    """
    Removes outliers from a dataset based on the IQR method.

    Parameters:
    - data: The dataset (list or numpy array).
    - cutoff: The factor by which to multiply the IQR to determine outlier thresholds (default is 1.5).

    Returns:
    - A list or pandas Series with outliers removed.
    """
    # Convert the data into a pandas Series if it's not already
    if not isinstance(data, pd.Series):
        data = pd.Series(data)

    # Calculate Q1 (25th percentile) and Q3 (75th percentile)
    Q1 = data.quantile(0.25)
    Q3 = data.quantile(0.75)

    # Calculate the Interquartile Range (IQR)
    IQR = Q3 - Q1

    # Calculate the lower and upper bounds
    lower_bound = Q1 - (cutoff * IQR)
    upper_bound = Q3 + (cutoff * IQR)

    # Remove the outliers
    filtered_data = data[(data >= lower_bound) & (data <= upper_bound)]

    return filtered_data

In [4]:
# Example usage:
data = np.random.normal(loc=50, scale=10, size=10)
data = np.round(data).astype(int)
cutoff = 1.5  # Default IQR cutoff
filtered_data = remove_outliers(data, cutoff)

print("Original Data: ", data)
print("Filtered Data: ", filtered_data.tolist())

Original Data:  [49 49 51 54 26 52 46 52 65 43]
Filtered Data:  [49, 49, 51, 54, 52, 46, 52, 43]
