# Handling Missing Data in Pandas Coding Practice Questions

In [1]:
import pandas as pd
import numpy as np

1. Create a Pandas DataFrame named `df` with values `[[1, np.nan], [2, 3], [np.nan, 4]]` and columns `['A', 'B']`. Display the DataFrame.

In [28]:
df = pd.DataFrame([[1, np.nan], [2, 3], [np.nan, 4]], columns=['A', 'B'])
df

Unnamed: 0,A,B
0,1.0,
1,2.0,3.0
2,,4.0


2. Check if there are any missing values in the DataFrame `df`.

In [29]:
df.isnull()

Unnamed: 0,A,B
0,False,True
1,False,False
2,True,False


In [30]:
df.isnull().any().any()

True

3. Count the number of missing values in each column of `df`.

In [36]:
df.isnull().sum()

A    1
B    1
dtype: int64

4. Drop all rows containing missing values from `df`.

In [6]:
df.dropna()

Unnamed: 0,A,B
1,2.0,3.0


5. Drop all columns containing missing values from `df`.

In [8]:
df.dropna(axis=1)

0
1
2


6. Fill all missing values in `df` with 0.

In [9]:
df.fillna(0)

Unnamed: 0,A,B
0,1.0,0.0
1,2.0,3.0
2,0.0,4.0


7. Fill missing values in column 'A' of `df` with the mean of the column.

In [13]:
df['A'].fillna(df['A'].mean())

0    1.0
1    2.0
2    1.5
Name: A, dtype: float64

8. Forward-fill missing values in `df`.

In [16]:
df.fillna(method='ffill')

Unnamed: 0,A,B
0,1.0,
1,2.0,3.0
2,2.0,4.0


In [38]:
df.ffill()

Unnamed: 0,A,B
0,1.0,
1,2.0,3.0
2,2.0,4.0


9. Backward-fill missing values in `df`.

In [17]:
df.fillna(method='bfill')

Unnamed: 0,A,B
0,1.0,3.0
1,2.0,3.0
2,,4.0


In [39]:
df.bfill()

Unnamed: 0,A,B
0,1.0,3.0
1,2.0,3.0
2,,4.0


10. Use the `interpolate` method to fill missing values in `df`.

In [18]:
df.interpolate()

Unnamed: 0,A,B
0,1.0,
1,2.0,3.0
2,2.0,4.0


11. Replace all occurrences of the value 3 in `df` with `NaN`.

In [21]:
df[df == 3] = np.nan
df

Unnamed: 0,A,B
0,1.0,
1,2.0,
2,,4.0


In [40]:
df.replace(3, np.nan)

Unnamed: 0,A,B
0,1.0,
1,2.0,
2,,4.0


12. Check if `df` has any infinite values.

In [27]:
df.isin([np.inf, -np.inf])

Unnamed: 0,A,B
0,False,False
1,False,False
2,False,False


In [42]:
np.isinf(df).any().any()

False

13. Replace any infinite values in `df` with `NaN`.

In [44]:
df.replace([np.inf, -np.inf], np.nan)
df

Unnamed: 0,A,B
0,1.0,
1,2.0,3.0
2,,4.0


14. Convert the data type of column 'A' in `df` to integer, handling any missing values.

In [51]:
df['A'].fillna(3.0).astype('int32')

0    1
1    2
2    3
Name: A, dtype: int32

In [70]:
df['A'].fillna(0).astype(int)

0    1
1    2
2    0
Name: A, dtype: int64

15. Create a mask for `df` that indicates which entries are missing.

In [52]:
df.isnull()

Unnamed: 0,A,B
0,False,True
1,False,False
2,True,False


16. Count the number of non-missing values in each column of `df`.

In [55]:
df.count()

A    2
B    2
dtype: int64

In [72]:
df.notnull().sum()

A    2
B    2
dtype: int64

17. Replace missing values in `df` with the median of each column.

In [62]:
df.fillna(df.median())

Unnamed: 0,A,B
0,1.0,3.5
1,2.0,3.0
2,1.5,4.0


18. Fill missing values in `df` using a specified fill value for each column. Use 0 for column 'A' and 1 for column 'B'.

In [64]:
df.fillna({'A': 0, 'B': 1})

Unnamed: 0,A,B
0,1.0,1.0
1,2.0,3.0
2,0.0,4.0


19. Drop rows from `df` where all values are missing.

In [75]:
df.dropna(how='all')

Unnamed: 0,A,B
0,1.0,
1,2.0,3.0
2,,4.0


20. Drop rows from `df` where any value is missing.

In [76]:
df.dropna(how='any')

Unnamed: 0,A,B
1,2.0,3.0
