In [1]:
import pandas as pd
import numpy as np


# Create a DataFrame with missing values
data = {
    'A': [1, 2, np.nan, 4, None],
    'B': [None, 6, 7, 8, np.nan],
    'C': [10, 20, 30, 40, 50]
}

df = pd.DataFrame(data)

df

Unnamed: 0,A,B,C
0,1.0,,10
1,2.0,6.0,20
2,,7.0,30
3,4.0,8.0,40
4,,,50


In [2]:
# Use isna() to detect missing values in the DataFrame
missing_data = df.isna()
    
missing_data

Unnamed: 0,A,B,C
0,False,True,False
1,False,False,False
2,True,False,False
3,False,False,False
4,True,True,False


In [3]:
# Use isnull() to detect missing values in the DataFrame
missing_data = df.isnull()

missing_data

Unnamed: 0,A,B,C
0,False,True,False
1,False,False,False
2,True,False,False
3,False,False,False
4,True,True,False


In [4]:
# Use notnull() to detect non-missing values in the DataFrame
not_missing_data = df.notnull()
not_missing_data

Unnamed: 0,A,B,C
0,True,False,True
1,True,True,True
2,False,True,True
3,True,True,True
4,False,False,True


In [5]:
# Create a DataFrame with missing values
data = {
    'A': [1, 2, np.nan, 4, None],
    'B': [None, 6, 7, 8, np.nan],
    'C': [10, 20, 30, 40, 50]
}

df = pd.DataFrame(data)



print("Original DataFrame:")
print(df)


Original DataFrame:
     A    B   C
0  1.0  NaN  10
1  2.0  6.0  20
2  NaN  7.0  30
3  4.0  8.0  40
4  NaN  NaN  50


In [6]:
# Using dropna() to remove rows with missing values
df_dropped = df.dropna()
print("\nDataFrame after dropping rows with missing values:")
print(df_dropped)


DataFrame after dropping rows with missing values:
     A    B   C
1  2.0  6.0  20
3  4.0  8.0  40


In [7]:
# Create a DataFrame with missing values
data = {
    'A': [1, 2, np.nan, 4, None],
    'B': [None, 6, 7, 8, np.nan],
    'C': [10, 20, 30, 40, 50]
}

df = pd.DataFrame(data)


print("Original DataFrame:")
print(df)

Original DataFrame:
     A    B   C
0  1.0  NaN  10
1  2.0  6.0  20
2  NaN  7.0  30
3  4.0  8.0  40
4  NaN  NaN  50


In [8]:
    # Using dropna() to remove rows with missing values in column 'A' and 'B'
    df_dropped_subset = df.dropna(subset=['A', 'B'])


    print("\nDataFrame after dropping rows with missing values in column 'A' and 'B':")
    print(df_dropped_subset)


DataFrame after dropping rows with missing values in column 'A' and 'B':
     A    B   C
1  2.0  6.0  20
3  4.0  8.0  40


In [9]:
# Sample data with missing values
data = {'A': [1, 2, None, 4, 5],
        'B': [None, 10, 20, None, 50]}
df = pd.DataFrame(data)

print("Original DataFrame:")
print(df)

Original DataFrame:
     A     B
0  1.0   NaN
1  2.0  10.0
2  NaN  20.0
3  4.0   NaN
4  5.0  50.0


In [10]:
# Fill missing values in the DataFrame with a constant value, e.g., 0
constant_filled_df = df.fillna(0)

print("\nDataFrame with Missing Values Filled by Constant (0):")
print(constant_filled_df)


DataFrame with Missing Values Filled by Constant (0):
     A     B
0  1.0   0.0
1  2.0  10.0
2  0.0  20.0
3  4.0   0.0
4  5.0  50.0


In [11]:
# Sample data with missing values
data = {'A': [1, 2, None, 4, 5],
        'B': [None, 10, 20, None, 50]}
df = pd.DataFrame(data)


print("Original DataFrame:")
print(df)



Original DataFrame:
     A     B
0  1.0   NaN
1  2.0  10.0
2  NaN  20.0
3  4.0   NaN
4  5.0  50.0


In [12]:
# Fill missing values in column 'A' with the mean of non-missing values in column 'A'
mean_filled_df_A = df.copy()
mean_filled_df_A['A'] = df['A'].fillna(df['A'].mean())


In [13]:
# Fill missing values in column 'B' with the median of non-missing values in column 'B'
median_filled_df_B = df.copy()
median_filled_df_B['B'] = df['B'].fillna(df['B'].median())


In [14]:
print("\nDataFrame with Missing Values Filled by Mean:")
print(mean_filled_df_A)



DataFrame with Missing Values Filled by Mean:
     A     B
0  1.0   NaN
1  2.0  10.0
2  3.0  20.0
3  4.0   NaN
4  5.0  50.0


In [15]:

print("\nDataFrame with Missing Values Filled by Median:")
print(median_filled_df_B)


DataFrame with Missing Values Filled by Median:
     A     B
0  1.0  20.0
1  2.0  10.0
2  NaN  20.0
3  4.0  20.0
4  5.0  50.0


In [16]:
# Sample data with missing country name
data = {'Country': ['USA', 'Canada', None, 'Germany', None, 'India']}
df = pd.DataFrame(data)

print("Original DataFrame:")
print(df)



Original DataFrame:
   Country
0      USA
1   Canada
2     None
3  Germany
4     None
5    India


In [17]:
# Fill missing country names with a specific category 'Unknown'
specific_filled_df = df['Country'].fillna('Unknown')

print("\nDataFrame with Missing Country Names Filled by 'Unknown':")
print(specific_filled_df)


DataFrame with Missing Country Names Filled by 'Unknown':
0        USA
1     Canada
2    Unknown
3    Germany
4    Unknown
5      India
Name: Country, dtype: object


In [18]:
# Calculate the mode of the 'Country' column
mode_country = df['Country'].mode()[0]

print("\nMode of 'Country' column:", mode_country)




Mode of 'Country' column: Canada


In [19]:
# Fill missing country names with the mode
mode_filled_df = df['Country'].fillna(mode_country)

print("\nDataFrame with Missing Country Names Filled by Mode:")
print(mode_filled_df)


DataFrame with Missing Country Names Filled by Mode:
0        USA
1     Canada
2     Canada
3    Germany
4     Canada
5      India
Name: Country, dtype: object


In [20]:
data = {
    'A': [1, 2, None, 4, None, 6],
    'B': [3, None, 7, None, 11, 13]
}

df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

Original DataFrame:
     A     B
0  1.0   3.0
1  2.0   NaN
2  NaN   7.0
3  4.0   NaN
4  NaN  11.0
5  6.0  13.0


In [21]:
# Fill missing values using forward fill (ffill)
forward_filled_df = df.ffill()
print("\nDataFrame with Missing Values Filled by Forward Fill (Ffill):")
print(forward_filled_df)


DataFrame with Missing Values Filled by Forward Fill (Ffill):
     A     B
0  1.0   3.0
1  2.0   3.0
2  2.0   7.0
3  4.0   7.0
4  4.0  11.0
5  6.0  13.0


In [22]:
# Fill missing values using backward fill (bfill)
backward_filled_df = df.bfill()

print("Original DataFrame:")
print(df)

print("\nDataFrame with Missing Values Filled by Backward Fill (Bfill):")
print(backward_filled_df)

Original DataFrame:
     A     B
0  1.0   3.0
1  2.0   NaN
2  NaN   7.0
3  4.0   NaN
4  NaN  11.0
5  6.0  13.0

DataFrame with Missing Values Filled by Backward Fill (Bfill):
     A     B
0  1.0   3.0
1  2.0   7.0
2  4.0   7.0
3  4.0  11.0
4  6.0  11.0
5  6.0  13.0


In [23]:
# Sample data with missing values
data = pd.DataFrame({'A': [1, 2, np.nan, 4, np.nan], 'B': [5, np.nan, 7, 8, 9]})
data

Unnamed: 0,A,B
0,1.0,5.0
1,2.0,
2,,7.0
3,4.0,8.0
4,,9.0


In [24]:
# Linear interpolation
data_linear_interpolated = data.interpolate(method='linear')

print("Linear Interpolation:")
print(data_linear_interpolated)

Linear Interpolation:
     A    B
0  1.0  5.0
1  2.0  6.0
2  3.0  7.0
3  4.0  8.0
4  4.0  9.0


In [25]:
# Sample time-series data with missing values
time_index = pd.date_range(start='2023-01-01', periods=5, freq='D')
time_series_data = pd.Series([10, np.nan, 30, np.nan, 50], index=time_index)

print("Time-Series Data with Missing Values:")
print(time_series_data)

Time-Series Data with Missing Values:
2023-01-01    10.0
2023-01-02     NaN
2023-01-03    30.0
2023-01-04     NaN
2023-01-05    50.0
Freq: D, dtype: float64


In [26]:
# Sample data with duplicates
data = {
    'ID': [1, 2, 3, 4, 1, 5, 2],
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Alice', 'Eve', 'Bob'],
    'Age': [25, 30, 22, 28, 25, 29, 30]
}

df = pd.DataFrame(data)

df

Unnamed: 0,ID,Name,Age
0,1,Alice,25
1,2,Bob,30
2,3,Charlie,22
3,4,David,28
4,1,Alice,25
5,5,Eve,29
6,2,Bob,30


In [27]:
# Using duplicated() to detect duplicate rows
duplicates = df.duplicated()

print(duplicates)

0    False
1    False
2    False
3    False
4     True
5    False
6     True
dtype: bool


In [28]:
duplicates_subset = df.duplicated(subset=['ID', 'Name'])

print(duplicates_subset)

0    False
1    False
2    False
3    False
4     True
5    False
6     True
dtype: bool


In [29]:

# Using drop_duplicates() to remove duplicate rows
df_no_duplicates = df.drop_duplicates()

print(df_no_duplicates)

   ID     Name  Age
0   1    Alice   25
1   2      Bob   30
2   3  Charlie   22
3   4    David   28
5   5      Eve   29
