In [1]:
import pandas as pd
import numpy as np

# Sample dataset
data = {
    'Age': [25, 28, np.nan, 24, 30, 31, 150, 23, np.nan, 27],
    'Salary': [50000, 55000, 52000, np.nan, 60000, 62000, 1000000, 48000, 49000, 53000]
}
df = pd.DataFrame(data)
print("Original Data:")
print(df)

# Detecting missing values
print("\nMissing Values:")
print(df.isna())

# Filling missing values
df_filled = df.fillna({'Age': df['Age'].mean(), 'Salary': df['Salary'].median()})
print("\nData after filling missing values:")
print(df_filled)

# Dropping rows with missing values
df_dropped = df.dropna()
print("\nData after dropping rows with missing values:")
print(df_dropped)

# Handling outliers using IQR
Q1 = df['Age'].quantile(0.25)
Q3 = df['Age'].quantile(0.75)
IQR = Q3 - Q1
filter = (df['Age'] >= Q1 - 1.5 * IQR) & (df['Age'] <= Q3 + 1.5 * IQR)
df_outliers_removed = df[filter]
print("\nData after removing outliers:")
print(df_outliers_removed)

Original Data:
     Age     Salary
0   25.0    50000.0
1   28.0    55000.0
2    NaN    52000.0
3   24.0        NaN
4   30.0    60000.0
5   31.0    62000.0
6  150.0  1000000.0
7   23.0    48000.0
8    NaN    49000.0
9   27.0    53000.0

Missing Values:
     Age  Salary
0  False   False
1  False   False
2   True   False
3  False    True
4  False   False
5  False   False
6  False   False
7  False   False
8   True   False
9  False   False

Data after filling missing values:
      Age     Salary
0   25.00    50000.0
1   28.00    55000.0
2   42.25    52000.0
3   24.00    53000.0
4   30.00    60000.0
5   31.00    62000.0
6  150.00  1000000.0
7   23.00    48000.0
8   42.25    49000.0
9   27.00    53000.0

Data after dropping rows with missing values:
     Age     Salary
0   25.0    50000.0
1   28.0    55000.0
4   30.0    60000.0
5   31.0    62000.0
6  150.0  1000000.0
7   23.0    48000.0
9   27.0    53000.0

Data after removing outliers:
    Age   Salary
0  25.0  50000.0
1  28.0  55000.0
3  24