# Data Transformation - Data Science Koans

Master data transformation techniques!

## What You Will Learn
- Scaling and normalization
- Encoding categorical variables
- Binning and discretization
- Log transforms
- Feature combinations

## How to Use
1. Read each koan
2. Complete TODOs
3. Run validation
4. Iterate

In [None]:
# Setup
import sys
sys.path.append('../..')
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from koans.core.validator import KoanValidator
from koans.core.progress import ProgressTracker

validator = KoanValidator('05_data_transformation')
tracker = ProgressTracker()
print('Setup complete!')
print(f"Progress: {tracker.get_notebook_progress('05_data_transformation')}%")

## KOAN 5.1: Min-Max Scaling
**Objective**: Scale to 0-1
**Difficulty**: Beginner

In [None]:
def min_max_scale():
    data = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
    # TODO: Scale using MinMaxScaler
    pass

@validator.koan(1, "Min-Max Scaling", difficulty="Beginner")
def validate():
    result = min_max_scale()
assert result[0][0] == 0.0
assert result[-1][0] == 1.0
validate()

## KOAN 5.2: Standard Scaling
**Objective**: Z-score normalization
**Difficulty**: Beginner

In [None]:
def standard_scale():
    data = np.array([10, 20, 30, 40, 50]).reshape(-1, 1)
    # TODO: Use StandardScaler
    pass

@validator.koan(2, "Standard Scaling", difficulty="Beginner")
def validate():
    result = standard_scale()
assert abs(result.mean()) < 0.01
assert abs(result.std() - 1.0) < 0.01
validate()

## KOAN 5.3: One-Hot Encoding
**Objective**: Convert categories
**Difficulty**: Beginner

In [None]:
def one_hot_encode():
    df = pd.DataFrame({'color': ['red', 'blue', 'red', 'green']})
    # TODO: Use pd.get_dummies
    pass

@validator.koan(3, "One-Hot Encoding", difficulty="Beginner")
def validate():
    result = one_hot_encode()
assert result.shape[1] == 3
validate()

## KOAN 5.4: Label Encoding
**Objective**: Numeric categories
**Difficulty**: Beginner

In [None]:
def label_encode():
    categories = ['low', 'medium', 'high', 'low', 'high']
    # TODO: Map to 0, 1, 2
    pass

@validator.koan(4, "Label Encoding", difficulty="Beginner")
def validate():
    result = label_encode()
assert len(set(result)) == 3
validate()

## KOAN 5.5: Binning
**Objective**: Discretize continuous
**Difficulty**: Beginner

In [None]:
def bin_ages():
    ages = [5, 15, 25, 35, 45, 55, 65]
    # TODO: Create bins: child, adult, senior
    pass

@validator.koan(5, "Binning", difficulty="Beginner")
def validate():
    result = bin_ages()
assert len(set(result)) == 3
validate()

## KOAN 5.6: Log Transform
**Objective**: Handle skewness
**Difficulty**: Beginner

In [None]:
def log_transform():
    data = np.array([1, 10, 100, 1000])
    # TODO: Apply np.log10
    pass

@validator.koan(6, "Log Transform", difficulty="Beginner")
def validate():
    result = log_transform()
assert result[-1] == 3.0
validate()

## KOAN 5.7: Power Transform
**Objective**: Square/sqrt
**Difficulty**: Beginner

In [None]:
def square_root():
    data = np.array([1, 4, 9, 16, 25])
    # TODO: Apply square root
    pass

@validator.koan(7, "Power Transform", difficulty="Beginner")
def validate():
    result = square_root()
assert result[-1] == 5.0
validate()

## KOAN 5.8: Interaction Features
**Objective**: Combine features
**Difficulty**: Beginner

In [None]:
def create_interaction():
    df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
    # TODO: Create column C = A * B
    pass

@validator.koan(8, "Interaction Features", difficulty="Beginner")
def validate():
    result = create_interaction()
assert 'C' in result.columns
assert result['C'].iloc[0] == 4
validate()

## KOAN 5.9: Polynomial Features
**Objective**: Higher order terms
**Difficulty**: Beginner

In [None]:
def add_squared():
    df = pd.DataFrame({'x': [1, 2, 3, 4, 5]})
    # TODO: Add column x_squared
    pass

@validator.koan(9, "Polynomial Features", difficulty="Beginner")
def validate():
    result = add_squared()
assert 'x_squared' in result.columns
assert result['x_squared'].iloc[-1] == 25
validate()

## KOAN 5.10: Boolean Flags
**Objective**: Create indicators
**Difficulty**: Beginner

In [None]:
def create_flag():
    df = pd.DataFrame({'value': [5, 15, 25, 35]})
    # TODO: Add is_high flag (value > 20)
    pass

@validator.koan(10, "Boolean Flags", difficulty="Beginner")
def validate():
    result = create_flag()
assert 'is_high' in result.columns
assert result['is_high'].sum() == 2
validate()

## Congratulations!

You completed Data Transformation!

In [None]:
progress = tracker.get_notebook_progress('05_data_transformation')
print(f'Final Progress: {progress}%')
if progress == 100:
    print('Excellent! You mastered Data Transformation!')