In [None]:
def remove_outliers_from_column(df: pd.DataFrame, target_col, threshold:float = 1.5):
    Q1 = df[target_col].quantile(0.25)
    Q3 = df[target_col].quantile(0.75)
    IQR = Q3 - Q1
    
    lower_bound = Q1 - threshold * IQR
    upper_bound = Q3 + threshold * IQR

    outliers = df[(df[target_col] >= lower_bound) & (df[target_col] <= upper_bound)]

    return outliers

In [None]:
def remove_outliers_from_all_columns(df: pd.DataFrame, threshold:float = 1.5):
    df_clean = df.copy()

    numeric_columns = df.select_dtypes(include=["float64", "int64"]).columns

    for col in numeric_columns:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        IQR = Q3 - Q1
        
        lower_bound = Q1 - threshold * IQR
        upper_bound = Q3 + threshold * IQR

        df_clean = df_clean[(df_clean[col] >= lower_bound) & (df_clean[col] <= upper_bound)]

    return df_clean.copy()

In [None]:
def find_outliers_iqr(df: pd.DataFrame, threshold: float = 1.5):
    outlier_summary = {}

    numeric_columns = df.select_dtypes(include=["float64", "int64"]).columns
    
    for col in numeric_columns:
        Q1 = df[col].quantile(0.25)
        Q3 = df[col].quantile(0.75)
        IQR = Q3 - Q1
        
        lower_bound = Q1 - threshold * IQR
        upper_bound = Q3 + threshold * IQR

        outliers = df[(df[col] < lower_bound) | (df[col] > upper_bound)]

        outlier_summary[col] = {
            "outlier_count": len(outliers),
            "outlier_percentage": 100 * outliers.shape[0] / df.shape[0],
            "lower_bound": lower_bound,
            "upper_bound": upper_bound
        }

    return pd.DataFrame(outlier_summary)

In [None]:
import math

def plot_all_histograms(df: pd.DataFrame, title_prefix: str = ""):

    num_cols = df.select_dtypes(include=[np.number]).columns
    n_cols = 3
    n_rows = math.ceil(len(num_cols) / n_cols)

    plt.figure(figsize=(5 * n_cols, 4 * n_rows))

    for i , col in enumerate(num_cols, 1):
        plt.subplot(n_rows, n_cols, i)
        sns.histplot(df[col], bins=30, kde=True)
        plt.title(f"{title_prefix} {col}")
        plt.xlabel("")
        plt.ylabel("")

    plt.tight_layout()
    plt.show()

In [None]:
def inverse_boxcox(y, lambda_):
    if lambda_ == 0: return np.exp(y)
    return np.power(y * lambda_ + 1, 1 / lambda_)