In [1]:
import pandas as pd

# Step 1: Load your dataset
df = pd.read_csv("India_budget_2021.csv")  

# Step 2: Show missing data summary
print("🚨 Missing Values Before Cleaning:")
print(df.isnull().sum())
print("\n")

# Step 3: Option 1 – Drop rows with any missing values
df_drop_rows = df.dropna()
print("✅ After Dropping Rows with Missing Values:")
print(df_drop_rows.isnull().sum())

# Step 4: Option 2 – Fill missing values with a constant
df_fill_constant = df.fillna(0)
print("\n✅ After Filling with Constant 0:")
print(df_fill_constant.isnull().sum())

# Step 5: Option 3 – Fill numeric columns with mean
df_fill_mean = df.copy()
for col in df_fill_mean.select_dtypes(include='number').columns:
    df_fill_mean[col] = df_fill_mean[col].fillna(df_fill_mean[col].mean())

print("\n✅ After Filling Numeric Columns with Mean:")
print(df_fill_mean.isnull().sum())

# Step 6: Option 4 – Fill categorical columns with mode
df_fill_mode = df.copy()
for col in df_fill_mode.select_dtypes(include='object').columns:
    df_fill_mode[col] = df_fill_mode[col].fillna(df_fill_mode[col].mode()[0])

print("\n✅ After Filling Categorical Columns with Mode:")
print(df_fill_mode.isnull().sum())

# Step 7: Combine mean+mode filled data
df_cleaned = df_fill_mean.copy()
for col in df_fill_mode.columns:
    if df_cleaned[col].isnull().sum() > 0:
        df_cleaned[col] = df_cleaned[col].fillna(df_fill_mode[col])

# Step 8: Final missing check and export
print("\n🎯 Final Cleaned Dataset Missing Summary:")
print(df_cleaned.isnull().sum())

# Export cleaned data
df_cleaned.to_csv("cleaned_dataset.csv", index=False)
print("\n📁 Cleaned dataset saved as 'cleaned_dataset.csv'")


🚨 Missing Values Before Cleaning:
Department /Ministry         1
Fund allotted(in ₹crores)    0
dtype: int64


✅ After Dropping Rows with Missing Values:
Department /Ministry         0
Fund allotted(in ₹crores)    0
dtype: int64

✅ After Filling with Constant 0:
Department /Ministry         0
Fund allotted(in ₹crores)    0
dtype: int64

✅ After Filling Numeric Columns with Mean:
Department /Ministry         1
Fund allotted(in ₹crores)    0
dtype: int64

✅ After Filling Categorical Columns with Mode:
Department /Ministry         0
Fund allotted(in ₹crores)    0
dtype: int64

🎯 Final Cleaned Dataset Missing Summary:
Department /Ministry         0
Fund allotted(in ₹crores)    0
dtype: int64

📁 Cleaned dataset saved as 'cleaned_dataset.csv'
