# Create a DataFrame with Missing Values

In [1]:
import pandas as pd
import numpy as np
# Sample DataFrame with missing values
data = {
    'A': [1, 2, np.nan, 4, 5],
    'B': [np.nan, 2, 3, np.nan, 5],
    'C': [1, 2, 3, 4, 5]
}
df = pd.DataFrame(data)
print("Original DataFrame with missing values:\n", df)

Original DataFrame with missing values:
      A    B  C
0  1.0  NaN  1
1  2.0  2.0  2
2  NaN  3.0  3
3  4.0  NaN  4
4  5.0  5.0  5


# 1.Checking for Missing Values

In [3]:
# Check for missing values in the entire DataFrame
print("\nMissing values in DataFrame:\n", df.isnull())

# Check for missing values in each column
print("\nMissing values count in each column:\n", df.isnull().sum())


Missing values in DataFrame:
        A      B      C
0  False   True  False
1  False  False  False
2   True  False  False
3  False   True  False
4  False  False  False

Missing values count in each column:
 A    1
B    2
C    0
dtype: int64


# 2.Dropping Rows or Columns with Missing Values

* Drop Rows with Any Missing Values

In [4]:
# Drop rows with any missing values
df_drop_rows = df.dropna()
print("\nDataFrame after dropping rows with any missing values:\n", df_drop_rows)


DataFrame after dropping rows with any missing values:
      A    B  C
1  2.0  2.0  2
4  5.0  5.0  5


* Drop Columns with Any Missing Values

In [5]:
# Drop columns with any missing values
df_drop_cols = df.dropna(axis=1)
print("\nDataFrame after dropping columns with any missing values:\n", df_drop_cols)


DataFrame after dropping columns with any missing values:
    C
0  1
1  2
2  3
3  4
4  5


# 3.Filling Missing Values

* Fill with Mean

In [6]:
# Fill missing values with mean of the column
df_fill_mean = df.fillna(df.mean())
print("\nDataFrame after filling missing values with mean:\n", df_fill_mean)


DataFrame after filling missing values with mean:
      A         B  C
0  1.0  3.333333  1
1  2.0  2.000000  2
2  3.0  3.000000  3
3  4.0  3.333333  4
4  5.0  5.000000  5


* Fill with Median

In [7]:
# Fill missing values with median of the column
df_fill_median = df.fillna(df.median())
print("\nDataFrame after filling missing values with median:\n", df_fill_median)


DataFrame after filling missing values with median:
      A    B  C
0  1.0  3.0  1
1  2.0  2.0  2
2  3.0  3.0  3
3  4.0  3.0  4
4  5.0  5.0  5
