### Task 1: Handling Missing Values - Simple Imputation
**Description**: Given a dataset with missing values, impute the missing values using the mean for numerical features and the mode for categorical features.

In [1]:
# write your code from here
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import unittest

# ---------------------------- Sample Data ----------------------------
data = {
    'Age': [25, 30, np.nan, 40, 35],
    'Income': [50000, 60000, 55000, np.nan, 52000],
    'Gender': ['Male', 'Female', np.nan, 'Male', 'Female']
}
df = pd.DataFrame(data)

print("Original Data:\n", df)

# ---------------------------- Task 1: Simple Imputation ----------------------------
def impute_missing_values(df):
    df_copy = df.copy()
    for column in df_copy.columns:
        if df_copy[column].dtype in ['float64', 'int64']:
            df_copy[column].fillna(df_copy[column].mean(), inplace=True)
        else:
            df_copy[column].fillna(df_copy[column].mode()[0], inplace=True)
    return df_copy

df_imputed = impute_missing_values(df)
print("\nAfter Imputation:\n", df_imputed)

# ---------------------------- Task 2: Min-Max Normalization ----------------------------
def min_max_normalize(df, column):
    if column not in df.columns:
        raise ValueError(f"Column '{column}' not found.")
    scaler = MinMaxScaler()
    df_scaled = df.copy()
    df_scaled[column] = scaler.fit_transform(df_scaled[[column]])
    return df_scaled

df_minmax = min_max_normalize(df_imputed, 'Income')
print("\nAfter Min-Max Normalization on 'Income':\n", df_minmax)

# ---------------------------- Task 3: Drop Missing Values ----------------------------
def drop_missing(df):
    return df.dropna()

df_dropped = drop_missing(df)
print("\nAfter Dropping Rows with Missing Values:\n", df_dropped)

# ---------------------------- Task 4: Standardization ----------------------------
def standardize_feature(df, column):
    if column not in df.columns:
        raise ValueError(f"Column '{column}' not found.")
    scaler = StandardScaler()
    df_scaled = df.copy()
    df_scaled[column] = scaler.fit_transform(df_scaled[[column]])
    return df_scaled

df_standardized = standardize_feature(df_imputed, 'Age')
print("\nAfter Standardizing 'Age':\n", df_standardized)

# ---------------------------- Unit Tests ----------------------------
class TestDataProcessing(unittest.TestCase):

    def test_imputation(self):
        df_test = pd.DataFrame({
            'Num': [1, 2, np.nan],
            'Cat': ['A', np.nan, 'A']
        })
        result = impute_missing_values(df_test)
        self.assertFalse(result.isnull().values.any())

    def test_min_max(self):
        df_test = pd.DataFrame({'Val': [10, 20, 30]})
        result = min_max_normalize(df_test, 'Val')
        self.assertTrue((result['Val'] >= 0).all() and (result['Val'] <= 1).all())

    def test_drop_missing(self):
        df_test = pd.DataFrame({'A': [1, np.nan], 'B': [2, 3]})
        result = drop_missing(df_test)
        self.assertEqual(len(result), 1)

    def test_standardization(self):
        df_test = pd.DataFrame({'Score': [50, 60, 70]})
        result = standardize_feature(df_test, 'Score')
        self.assertAlmostEqual(result['Score'].mean(), 0, places=1)

unittest.TextTestRunner().run(unittest.TestLoader().loadTestsFromTestCase(TestDataProcessing))


....
----------------------------------------------------------------------
Ran 4 tests in 0.014s

OK


Original Data:
     Age   Income  Gender
0  25.0  50000.0    Male
1  30.0  60000.0  Female
2   NaN  55000.0     NaN
3  40.0      NaN    Male
4  35.0  52000.0  Female

After Imputation:
     Age   Income  Gender
0  25.0  50000.0    Male
1  30.0  60000.0  Female
2  32.5  55000.0  Female
3  40.0  54250.0    Male
4  35.0  52000.0  Female

After Min-Max Normalization on 'Income':
     Age  Income  Gender
0  25.0   0.000    Male
1  30.0   1.000  Female
2  32.5   0.500  Female
3  40.0   0.425    Male
4  35.0   0.200  Female

After Dropping Rows with Missing Values:
     Age   Income  Gender
0  25.0  50000.0    Male
1  30.0  60000.0  Female
4  35.0  52000.0  Female

After Standardizing 'Age':
    Age   Income  Gender
0 -1.5  50000.0    Male
1 -0.5  60000.0  Female
2  0.0  55000.0  Female
3  1.5  54250.0    Male
4  0.5  52000.0  Female


<unittest.runner.TextTestResult run=4 errors=0 failures=0>

### Task 2: Feature Scaling - Min-Max Normalization
**Description**: Normalize a numerical feature using Min-Max scaling to a range [0, 1].

In [None]:
# write your code from here

### Task 3: Handling Missing Values - Drop Missing Values
**Description**: Remove rows with missing values from a dataset.

In [None]:
# write your code from here

### Task 4: Feature Scaling - Standardization
**Description**: Standardize a numerical feature to have zero mean and unit variance.

In [None]:
# write your code from here