In [4]:
import pandas as pd
import numpy as np

# Setting the seed for reproducibility
np.random.seed(1911)

# Creating a 7x5 DataFrame with random decimal values
data = np.random.rand(7, 5) * 100
df = pd.DataFrame(data, columns=[f'Col{i}' for i in range(1, 6)])

# Introducing NaN values
# Calculating 20% of the total number of elements in the DataFrame
total_elements = df.size
nan_elements = int(total_elements * 0.4)

# Randomly choosing indices to replace with NaN
nan_row_indices = np.random.randint(0, df.shape[0], size=nan_elements)
nan_col_indices = np.random.randint(0, df.shape[1], size=nan_elements)

# Replacing selected indices with NaN
for row, col in zip(nan_row_indices, nan_col_indices):
    df.iat[row, col] = np.nan

print(df)


        Col1       Col2       Col3       Col4       Col5
0  32.993881  60.279023  60.078470        NaN  99.819401
1  46.153992  67.960621        NaN  32.296167  59.092763
2  86.086486  66.086887  15.560806        NaN  61.007589
3  68.503674        NaN        NaN        NaN  92.812538
4        NaN        NaN        NaN  10.125656  46.923909
5  94.704527  25.512543  92.394019        NaN  93.512213
6  80.743525        NaN        NaN        NaN        NaN


In [10]:
# Fill the NaN cells along the df with a value given by the user

df.fillna('missing_value') 

Unnamed: 0,Col1,Col2,Col3,Col4,Col5
0,32.993881,60.279023,60.07847,missing_value,99.819401
1,46.153992,67.960621,missing_value,32.296167,59.092763
2,86.086486,66.086887,15.560806,missing_value,61.007589
3,68.503674,missing_value,missing_value,missing_value,92.812538
4,missing_value,missing_value,missing_value,10.125656,46.923909
5,94.704527,25.512543,92.394019,missing_value,93.512213
6,80.743525,missing_value,missing_value,missing_value,missing_value


In [18]:
# Filling each columns with different values by using dict

df.fillna({'Col1': 0, 'Col3': '-', 'Col5': 'x'})

Unnamed: 0,Col1,Col2,Col3,Col4,Col5
0,32.99,60.28,60.07847,,99.819401
1,46.15,67.96,-,32.3,59.092763
2,86.09,66.09,15.560806,,61.007589
3,68.5,,-,,92.812538
4,0.0,,-,10.13,46.923909
5,94.7,25.51,92.394019,,93.512213
6,80.74,,-,,x


In [26]:
df.ffill(limit=1) # Forward-filling fills missing values with the last non-missing value of the column.
# By default, it operates along the column

Unnamed: 0,Col1,Col2,Col3,Col4,Col5
0,32.993881,60.279023,60.07847,,99.819401
1,46.153992,67.960621,60.07847,32.296167,59.092763
2,86.086486,66.086887,15.560806,32.296167,61.007589
3,68.503674,66.086887,15.560806,,92.812538
4,68.503674,,,10.125656,46.923909
5,94.704527,25.512543,92.394019,10.125656,93.512213
6,80.743525,25.512543,92.394019,,93.512213
