# Handling missing values

In [None]:
# Check for missing values
print(df.isnull().sum())

# Drop rows with missing values
df_clean = df.dropna()

# Fill missing values
df_filled = df.fillna(0)  # With zero
df_filled = df.fillna(df.mean())  # With mean
df_filled = df.fillna(df.median())  # With median
df_filled = df.fillna(df.mode().iloc[0])  # With mode

# Advanced imputation
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='mean')
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

# Handling duplicate data

In [None]:
# Check for duplicates
print(df.duplicated().sum())

# Drop duplicates
df_unique = df.drop_duplicates()

# Drop duplicates based on specific columns
df_unique = df.drop_duplicates(subset=['col1', 'col2'])

# Detecting and correcting inconsistencies

In [None]:
# Check unique values in categorical columns
print(df['category_column'].unique())

# Standardize categories
df['category_column'] = df['category_column'].str.lower().str.strip()

# Correct inconsistent numerical values
df['numeric_column'] = pd.to_numeric(df['numeric_column'], errors='coerce')

# Type conversions

In [None]:
# Convert to categorical
df['category_column'] = df['category_column'].astype('category')

# Convert to datetime
df['date_column'] = pd.to_datetime(df['date_column'])

# Convert to numeric
df['numeric_column'] = pd.to_numeric(df['numeric_column'])