In [1]:
# Interpolate missing values using linear method

In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame({
    'Temperature': [22.5, np.nan, 24.0, np.nan, 25.5]
})

df['Temperature_Interpolated'] = df['Temperature'].interpolate(method='linear')

print(df)

   Temperature  Temperature_Interpolated
0         22.5                     22.50
1          NaN                     23.25
2         24.0                     24.00
3          NaN                     24.75
4         25.5                     25.50


In [4]:
# Use a NumPy mask to fill missing values with a specific value

In [5]:
mask = df['Temperature'].isna().to_numpy()

df['Temperature_Filled'] = df['Temperature']
df.loc[mask, 'Temperature_Filled'] = 23.0

print(df)

   Temperature  Temperature_Interpolated  Temperature_Filled
0         22.5                     22.50                22.5
1          NaN                     23.25                23.0
2         24.0                     24.00                24.0
3          NaN                     24.75                23.0
4         25.5                     25.50                25.5


In [6]:
# Identify and replace outliers with the median using a NumPy mask

In [7]:
df2 = pd.DataFrame({
    'Sales': [100, 105, 110, 5000, 115, 120]  
})


Q1 = df2['Sales'].quantile(0.25)
Q3 = df2['Sales'].quantile(0.75)
IQR = Q3 - Q1


lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR


outlier_mask = ((df2['Sales'] < lower_bound) | (df2['Sales'] > upper_bound)).to_numpy()


median_value = df2['Sales'].median()
df2['Sales_Cleaned'] = df2['Sales']
df2.loc[outlier_mask, 'Sales_Cleaned'] = median_value

print(df2)

   Sales  Sales_Cleaned
0    100          100.0
1    105          105.0
2    110          110.0
3   5000          112.5
4    115          115.0
5    120          120.0
