In [2]:
import pandas as pd

# Creating example dataset with missing values, duplicates, and outliers
data = pd.DataFrame({'A': [1, 2, None, 4, 5, 6],
                     'B': [7, 8, 9, None, 11, 12],
                     'C': [13, 14, 15, 16, 17, 18],
                     'D': [19, 20, 21, 22, 23, 24],
                     'E': [25, 26, 27, 28, None, 30],
                     'F': [31, 32, 33, 34, 35, 36],
                     'G': [37, 38, 39, 40, 41, 42]})

# Handling missing data
data_filled = data.fillna(0)  # Fill missing values with 0
data_replaced = data.replace({None: -1})  # Replace None values with -1

# Removing duplicates
data_no_duplicates = data.drop_duplicates()

# Detecting and removing outliers
outliers = data[(data['C'] < 10) | (data['C'] > 30)]  # Example outlier detection based on a condition
data_no_outliers = data[~((data['C'] < 10) | (data['C'] > 30))]  # Remove outliers based on a condition

# Print the results
print("Original Data:")
print(data)
print("\nFilled Data:")
print(data_filled)
print("\nReplaced Data:")
print(data_replaced)
print("\nData without Duplicates:")
print(data_no_duplicates)
print("\nData with Outliers:")
print(outliers)
print("\nData without Outliers:")
print(data_no_outliers)


Original Data:
     A     B   C   D     E   F   G
0  1.0   7.0  13  19  25.0  31  37
1  2.0   8.0  14  20  26.0  32  38
2  NaN   9.0  15  21  27.0  33  39
3  4.0   NaN  16  22  28.0  34  40
4  5.0  11.0  17  23   NaN  35  41
5  6.0  12.0  18  24  30.0  36  42

Filled Data:
     A     B   C   D     E   F   G
0  1.0   7.0  13  19  25.0  31  37
1  2.0   8.0  14  20  26.0  32  38
2  0.0   9.0  15  21  27.0  33  39
3  4.0   0.0  16  22  28.0  34  40
4  5.0  11.0  17  23   0.0  35  41
5  6.0  12.0  18  24  30.0  36  42

Replaced Data:
     A     B   C   D     E   F   G
0  1.0   7.0  13  19  25.0  31  37
1  2.0   8.0  14  20  26.0  32  38
2  NaN   9.0  15  21  27.0  33  39
3  4.0   NaN  16  22  28.0  34  40
4  5.0  11.0  17  23   NaN  35  41
5  6.0  12.0  18  24  30.0  36  42

Data without Duplicates:
     A     B   C   D     E   F   G
0  1.0   7.0  13  19  25.0  31  37
1  2.0   8.0  14  20  26.0  32  38
2  NaN   9.0  15  21  27.0  33  39
3  4.0   NaN  16  22  28.0  34  40
4  5.0  11.0  17  2