# Feature Engineering - Data Science Koans

Master feature engineering!

## What You Will Learn
- Date/time features
- Text features
- Aggregations
- Lag and rolling features

## How to Use
1. Read each koan
2. Complete TODOs
3. Run validation
4. Iterate

In [None]:
# Setup
import sys
sys.path.append('../..')
import numpy as np
import pandas as pd
from koans.core.validator import KoanValidator
from koans.core.progress import ProgressTracker

validator = KoanValidator('06_feature_engineering')
tracker = ProgressTracker()
print('Setup complete!')
print(f"Progress: {tracker.get_notebook_progress('06_feature_engineering')}%")

## KOAN 6.1: Date Parts
**Objective**: Extract components
**Difficulty**: Intermediate

In [None]:
def extract_date():
    df = pd.DataFrame({'date': pd.date_range('2023-01-01', periods=3)})
    # TODO: Add year, month columns
    pass

@validator.koan(1, "Date Parts", difficulty="Intermediate")
def validate():
    result = extract_date()
assert 'year' in result.columns
validate()

## KOAN 6.2: Time Features
**Objective**: Hour/minute
**Difficulty**: Intermediate

In [None]:
def time_features():
    df = pd.DataFrame({'time': pd.date_range('2023-01-01', periods=24, freq='H')})
    # TODO: Add hour column
    pass

@validator.koan(2, "Time Features", difficulty="Intermediate")
def validate():
    result = time_features()
assert 'hour' in result.columns
validate()

## KOAN 6.3: Text Length
**Objective**: String metrics
**Difficulty**: Intermediate

In [None]:
def text_len():
    df = pd.DataFrame({'text': ['hi', 'hello', 'world']})
    # TODO: Add length column
    pass

@validator.koan(3, "Text Length", difficulty="Intermediate")
def validate():
    result = text_len()
assert result['length'].iloc[1] == 5
validate()

## KOAN 6.4: Word Count
**Objective**: Count words
**Difficulty**: Intermediate

In [None]:
def word_cnt():
    df = pd.DataFrame({'text': ['one', 'one two']})
    # TODO: Add word_count
    pass

@validator.koan(4, "Word Count", difficulty="Intermediate")
def validate():
    result = word_cnt()
assert result['word_count'].iloc[1] == 2
validate()

## KOAN 6.5: Ratio Features
**Objective**: Compute ratios
**Difficulty**: Intermediate

In [None]:
def make_ratio():
    df = pd.DataFrame({'a': [10, 20], 'b': [5, 10]})
    # TODO: Add ratio = a / b
    pass

@validator.koan(5, "Ratio Features", difficulty="Intermediate")
def validate():
    result = make_ratio()
assert result['ratio'].iloc[0] == 2.0
validate()

## KOAN 6.6: Group Mean
**Objective**: Aggregation
**Difficulty**: Intermediate

In [None]:
def group_agg():
    df = pd.DataFrame({'cat': ['A','A','B'], 'val': [10,20,30]})
    # TODO: Add cat_mean
    pass

@validator.koan(6, "Group Mean", difficulty="Intermediate")
def validate():
    result = group_agg()
assert 'cat_mean' in result.columns
validate()

## KOAN 6.7: Lag Features
**Objective**: Previous value
**Difficulty**: Intermediate

In [None]:
def make_lag():
    df = pd.DataFrame({'val': [1, 2, 3]})
    # TODO: Add lag_1
    pass

@validator.koan(7, "Lag Features", difficulty="Intermediate")
def validate():
    result = make_lag()
assert pd.isna(result['lag_1'].iloc[0])
validate()

## KOAN 6.8: Rolling Mean
**Objective**: Moving average
**Difficulty**: Intermediate

In [None]:
def rolling_avg():
    df = pd.DataFrame({'val': [1, 2, 3, 4, 5]})
    # TODO: Add roll_2 (window=2)
    pass

@validator.koan(8, "Rolling Mean", difficulty="Intermediate")
def validate():
    result = rolling_avg()
assert 'roll_2' in result.columns
validate()

## KOAN 6.9: Frequency Encode
**Objective**: Count encoding
**Difficulty**: Intermediate

In [None]:
def freq_encode():
    df = pd.DataFrame({'cat': ['A','B','A']})
    # TODO: Add cat_freq
    pass

@validator.koan(9, "Frequency Encode", difficulty="Intermediate")
def validate():
    result = freq_encode()
assert result['cat_freq'].iloc[0] == 2
validate()

## KOAN 6.10: Target Encode
**Objective**: Mean encoding
**Difficulty**: Intermediate

In [None]:
def target_encode():
    df = pd.DataFrame({'cat': ['A','A','B'], 'y': [1,2,3]})
    # TODO: Add cat_mean_y
    pass

@validator.koan(10, "Target Encode", difficulty="Intermediate")
def validate():
    result = target_encode()
assert 'cat_mean_y' in result.columns
validate()

## Congratulations!

You completed Feature Engineering!

In [None]:
progress = tracker.get_notebook_progress('06_feature_engineering')
print(f'Final Progress: {progress}%')
if progress == 100:
    print('Excellent! You mastered Feature Engineering!')