In [9]:
import pandas as pd
import numpy as np

# Sample dataset
data = {
    'day_of_week': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
    'temperature': [22, 28, 19, 21, 25, 30, 27],
    'humidity': [65, 55, 80, 70, 60, 50, 75],
    'wind_speed': [10, 5, 12, 9, 15, 6, 8],
    'pressure': [1012, 1018, 1009, 1010, 1013, 1020, 1016],
    'rain': [1, 0, 1, 0, 0, 0, 1]  # Target variable (1 = Rain, 0 = No rain)
}

df = pd.DataFrame(data)
print("Original DataFrame:\n", df)


Original DataFrame:
   day_of_week  temperature  humidity  wind_speed  pressure  rain
0      Monday           22        65          10      1012     1
1     Tuesday           28        55           5      1018     0
2   Wednesday           19        80          12      1009     1
3    Thursday           21        70           9      1010     0
4      Friday           25        60          15      1013     0
5    Saturday           30        50           6      1020     0
6      Sunday           27        75           8      1016     1


In [10]:
# Introduce missing values
df.loc[1, 'temperature'] = np.nan  # Tuesday temperature missing
df.loc[3, 'humidity'] = np.nan      # Thursday humidity missing
df.loc[5, 'wind_speed'] = np.nan    # Saturday wind_speed missing
df.loc[6, 'pressure'] = np.nan      # Sunday pressure missing

print("\nDataFrame with missing values:\n", df)



DataFrame with missing values:
   day_of_week  temperature  humidity  wind_speed  pressure  rain
0      Monday         22.0      65.0        10.0    1012.0     1
1     Tuesday          NaN      55.0         5.0    1018.0     0
2   Wednesday         19.0      80.0        12.0    1009.0     1
3    Thursday         21.0       NaN         9.0    1010.0     0
4      Friday         25.0      60.0        15.0    1013.0     0
5    Saturday         30.0      50.0         NaN    1020.0     0
6      Sunday         27.0      75.0         8.0       NaN     1


In [11]:
missing_counts = df.isnull().sum()
print("\nMissing values count in each column:\n", missing_counts)



Missing values count in each column:
 day_of_week    0
temperature    1
humidity       1
wind_speed     1
pressure       1
rain           0
dtype: int64


In [12]:
df_dropped = df.dropna()
print("\nDataFrame after dropping rows with missing values:\n", df_dropped)



DataFrame after dropping rows with missing values:
   day_of_week  temperature  humidity  wind_speed  pressure  rain
0      Monday         22.0      65.0        10.0    1012.0     1
2   Wednesday         19.0      80.0        12.0    1009.0     1
4      Friday         25.0      60.0        15.0    1013.0     0


In [13]:
# Fill with mean for numerical columns
df_filled_mean = df.fillna(df.mean(numeric_only=True))
print("\nDataFrame after filling missing values with mean:\n", df_filled_mean)

# Fill with median
df_filled_median = df.fillna(df.median(numeric_only=True))
print("\nDataFrame after filling missing values with median:\n", df_filled_median)

# Fill with mode (most frequent value)
df_filled_mode = df.fillna(df.mode().iloc[0])
print("\nDataFrame after filling missing values with mode:\n", df_filled_mode)



DataFrame after filling missing values with mean:
   day_of_week  temperature   humidity  wind_speed     pressure  rain
0      Monday         22.0  65.000000   10.000000  1012.000000     1
1     Tuesday         24.0  55.000000    5.000000  1018.000000     0
2   Wednesday         19.0  80.000000   12.000000  1009.000000     1
3    Thursday         21.0  64.166667    9.000000  1010.000000     0
4      Friday         25.0  60.000000   15.000000  1013.000000     0
5    Saturday         30.0  50.000000    9.833333  1020.000000     0
6      Sunday         27.0  75.000000    8.000000  1013.666667     1

DataFrame after filling missing values with median:
   day_of_week  temperature  humidity  wind_speed  pressure  rain
0      Monday         22.0      65.0        10.0    1012.0     1
1     Tuesday         23.5      55.0         5.0    1018.0     0
2   Wednesday         19.0      80.0        12.0    1009.0     1
3    Thursday         21.0      62.5         9.0    1010.0     0
4      Friday    

In [14]:
# Forward fill
df_filled_ffill = df.fillna(method='ffill')
print("\nDataFrame after forward filling:\n", df_filled_ffill)

# Backward fill
df_filled_bfill = df.fillna(method='bfill')
print("\nDataFrame after backward filling:\n", df_filled_bfill)



DataFrame after forward filling:
   day_of_week  temperature  humidity  wind_speed  pressure  rain
0      Monday         22.0      65.0        10.0    1012.0     1
1     Tuesday         22.0      55.0         5.0    1018.0     0
2   Wednesday         19.0      80.0        12.0    1009.0     1
3    Thursday         21.0      80.0         9.0    1010.0     0
4      Friday         25.0      60.0        15.0    1013.0     0
5    Saturday         30.0      50.0        15.0    1020.0     0
6      Sunday         27.0      75.0         8.0    1020.0     1

DataFrame after backward filling:
   day_of_week  temperature  humidity  wind_speed  pressure  rain
0      Monday         22.0      65.0        10.0    1012.0     1
1     Tuesday         19.0      55.0         5.0    1018.0     0
2   Wednesday         19.0      80.0        12.0    1009.0     1
3    Thursday         21.0      60.0         9.0    1010.0     0
4      Friday         25.0      60.0        15.0    1013.0     0
5    Saturday      

  df_filled_ffill = df.fillna(method='ffill')
  df_filled_bfill = df.fillna(method='bfill')
