# Identifying outliers

In [None]:
# Z-score method
from scipy import stats
import numpy as np

z_scores = np.abs(stats.zscore(df.select_dtypes(include=['float64', 'int64'])))
outliers = (z_scores > 3).any(axis=1)
print(f"Number of outliers (Z-score > 3): {outliers.sum()}")

# IQR method
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
outliers_iqr = ((df < (Q1 - 1.5 * IQR)) | (df > (Q3 + 1.5 * IQR))).any(axis=1)
print(f"Number of outliers (IQR method): {outliers_iqr.sum()}")

# Boxplot visualization

In [None]:
plt.figure(figsize=(12, 6))
sns.boxplot(data=df.select_dtypes(include=['float64', 'int64']))
plt.title('Boxplot for Outlier Detection')
plt.xticks(rotation=45)
plt.show()

# Handling outliers

In [None]:
# Option 1: Remove outliers
df_no_outliers = df[~outliers]

# Option 2: Cap outliers
for col in df.select_dtypes(include=['float64', 'int64']).columns:
    upper_limit = df[col].mean() + 3*df[col].std()
    lower_limit = df[col].mean() - 3*df[col].std()
    df[col] = np.where(df[col] > upper_limit, upper_limit,
                      np.where(df[col] < lower_limit, lower_limit, df[col]))