# Introduction: Testing Cyclical Encoding of Features for Machine Learning

In [7]:
import pandas as pd
import numpy as np

import glob

In [8]:
building_data_files = glob.glob('data/building*')
len(building_data_files)

40

In [10]:
data = pd.read_csv(building_data_files[10], parse_dates=['timestamp'], index_col=0).set_index('timestamp')
data.head()
data.info()

Unnamed: 0_level_0,temperature,energy
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-09-18 04:00:00,56.2403,1.682686
2016-09-18 04:15:00,56.087501,2.086212
2016-09-18 04:30:00,56.213232,1.68788
2016-09-18 04:45:00,56.400049,1.926518
2016-09-18 05:00:00,56.592497,1.922459


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 36960 entries, 2016-09-18 04:00:00 to 2017-10-08 03:45:00
Data columns (total 2 columns):
temperature    36960 non-null float64
energy         36960 non-null float64
dtypes: float64(2)
memory usage: 866.2 KB


In [15]:
from sklearn.base import BaseEstimator, TransformerMixin


class DateTimeFeatures(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        field = X.index
        X["time_of_day"] = field.hour + field.minute / 60
        X["day_of_year"] = field.dayofyear
        return X


class CyclicalDateTimeFeatures(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        X["sin_time_of_day"], X["cos_time_of_day"] = _cyclical_encoding(
            X["time_of_day"], period=24
        )
        X["sin_day_of_year"], X["cos_day_of_year"] = _cylical_encoding(
            X["day_of_year"], period=366
        )
        return X


def _cyclical_encoding(series, period):
    base = 2 * np.pi * series / period
    return np.sin(base), np.cos(base)

In [16]:
from sklearn.pipeline import Pipeline

transforms = Pipeline(
    steps=[
        ("date_time_features", DateTimeFeatures()),
        ("cylical_date_time_features", CyclicalDateTimeFeatures()),
    ]
)

transformed_data = transforms.transform(data)
transformed_data.head()

Unnamed: 0_level_0,temperature,energy,time_of_day,day_of_year,sin_time_of_day,cos_time_of_day,sin_day_of_year,cos_day_of_year
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2016-09-18 04:00:00,56.2403,1.682686,4.0,262,0.866025,0.5,-0.977064,-0.212947
2016-09-18 04:15:00,56.087501,2.086212,4.25,262,0.896873,0.442289,-0.977064,-0.212947
2016-09-18 04:30:00,56.213232,1.68788,4.5,262,0.92388,0.382683,-0.977064,-0.212947
2016-09-18 04:45:00,56.400049,1.926518,4.75,262,0.94693,0.321439,-0.977064,-0.212947
2016-09-18 05:00:00,56.592497,1.922459,5.0,262,0.965926,0.258819,-0.977064,-0.212947
