# Features Package Examples #

## Import ##

Import your normal libraries. The features package libraries will be loaded as needed. The feature package shoudl be relative to whatever directory you are in.

In [1]:
import pandas as pd

## Data ##

Load in the data you want to manupulate.

In [2]:
def get_data():
    data = pd.DataFrame.from_csv("../../../data/mbta_sample.csv").reset_index()
    return data

## Date/Time ##

Methods used to add date/time features.

In [3]:
#
# Normally this would be:
#
# import features.date
#

import date

### Init ###

First you must initialize the data (essentially turns the service_datetime in a datetime field in pandas).

In [4]:
original_data = date.init(get_data())

### Day of Week ###

In [5]:
data = date.add_day_of_week(original_data.copy())

indices = [0]
indices.extend(range(-1, 0))

data.ix[:,indices].head()

Unnamed: 0,locationid,day_of_week
0,1002,1
1,1002,1
2,1002,1
3,1002,1
4,1002,1


### Day of Week Binary ###

In [6]:
data = date.add_day_of_week_binary(original_data.copy())

indices = [0]
indices.extend(range(-7, 0))

data.ix[:,indices].head()

Unnamed: 0,locationid,day_of_week_0,day_of_week_1,day_of_week_2,day_of_week_3,day_of_week_4,day_of_week_5,day_of_week_6
0,1002,0,1,0,0,0,0,0
1,1002,0,1,0,0,0,0,0
2,1002,0,1,0,0,0,0,0
3,1002,0,1,0,0,0,0,0
4,1002,0,1,0,0,0,0,0


### Hour Binary ###

In [7]:
data = date.add_hour_binary(original_data.copy())

indices = [0]
indices.extend(range(-24, 0))

data.ix[:,indices].head()

Unnamed: 0,locationid,hour_0,hour_1,hour_2,hour_3,hour_4,hour_5,hour_6,hour_7,hour_8,...,hour_14,hour_15,hour_16,hour_17,hour_18,hour_19,hour_20,hour_21,hour_22,hour_23
0,1002,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1002,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1002,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1002,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1002,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Month Binary ###

In [8]:
data = date.add_month_binary(original_data.copy())

indices = [0]
indices.extend(range(-12, 0))

data.ix[:,indices].head()

Unnamed: 0,locationid,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11,month_12
0,1002,1,0,0,0,0,0,0,0,0,0,0,0
1,1002,1,0,0,0,0,0,0,0,0,0,0,0
2,1002,1,0,0,0,0,0,0,0,0,0,0,0
3,1002,1,0,0,0,0,0,0,0,0,0,0,0
4,1002,1,0,0,0,0,0,0,0,0,0,0,0


### Weekend ###

In [9]:
data = date.add_weekend(original_data.copy())

indices = [0]
indices.extend(range(-1, 0))

data.ix[:,indices].head()

Unnamed: 0,locationid,weekend
0,1002,0
1,1002,0
2,1002,0
3,1002,0
4,1002,0


### Service Time (Fraction) ###

In [10]:
data = date.add_service_minutes_fraction(original_data.copy())

indices = [0]
indices.extend(range(-1, 0))

data.ix[:,indices].head()

Unnamed: 0,locationid,service_minutes_fraction
0,1002,0.0
1,1002,0.0
2,1002,0.25
3,1002,0.5
4,1002,0.75


## Weather ##

Methods used to add weather features.

In [14]:
#
# Normally this would be:
#
# import features.weather
#

import weather

### Init ###

First you must initialize the data (essentially turns the columns into the proper format for pandas).

In [15]:
original_data = weather.init(get_data())

### Rain Predict (Binary) ###

In [16]:
data = weather.add_rain_predict(original_data.copy())

indices = [0]
indices.extend(range(-1, 0))

data.ix[:,indices].head()

Unnamed: 0,locationid,rain_predict
0,1002,0
1,1002,0
2,1002,0
3,1002,0
4,1002,0


### Rain Fall Predict ###

In [17]:
data = weather.add_rain_fall_predict(original_data.copy())

indices = [0]
indices.extend(range(-1, 0))

data.ix[:,indices].head()

Unnamed: 0,locationid,rain_fall_predict
0,1002,0
1,1002,0
2,1002,0
3,1002,0
4,1002,0


### Snow Accum ###

In [18]:
data = weather.add_snow_accum(original_data.copy())

indices = [0]
indices.extend(range(-1, 0))

data.ix[:,indices].head()

Unnamed: 0,locationid,snow_accum
0,1002,0
1,1002,0
2,1002,0
3,1002,0
4,1002,0


### Snow Accum Predict ###

In [19]:
# Make sure the snow accum columns exists.
data = weather.add_snow_accum_predict(data)

indices = [0]
indices.extend(range(-1, 0))

data.ix[:,indices].head()

Unnamed: 0,locationid,snow_accum_predict
0,1002,0
1,1002,0
2,1002,0
3,1002,0
4,1002,0


### Snow Predict (Binary) ###

In [20]:
data = weather.add_snow_predict(original_data.copy())

indices = [0]
indices.extend(range(-1, 0))

data.ix[:,indices].head()

Unnamed: 0,locationid,snow_predict
0,1002,0
1,1002,0
2,1002,0
3,1002,0
4,1002,0


### Snow Fall Predict ###

In [21]:
data = weather.add_snow_fall_predict(original_data.copy())

indices = [0]
indices.extend(range(-1, 0))

data.ix[:,indices].head()

Unnamed: 0,locationid,snow_fall_predict
0,1002,0
1,1002,0
2,1002,0
3,1002,0
4,1002,0


## Entries ##

Methods used to add entry features.

In [3]:
#
# Normally this would be:
#
# import features.entries
#

import entries

### Init ###

First you must initialize the data (essentially turns the columns into the proper format for pandas).

In [4]:
original_data = entries.init(get_data())

### Previous Week ###

In [7]:
data = entries.add_previous_week(original_data.copy(), weeks_ago = 1)

indices = [0]
indices.extend(range(-1, 0))

data.ix[:,indices].head()

Unnamed: 0,locationid,entries_weeks_ago_1
0,1002,
1,1002,
2,1002,
3,1002,
4,1002,


## Station ##

Methods used to add station features.

In [8]:
#
# Normally this would be:
#
# import features.station
#

import station

### Init ###

First you must initialize the data (essentially turns the columns into the proper format for pandas).

In [9]:
original_data = station.init(get_data())

In [11]:
data = station.add_distance_to_center(original_data.copy())

indices = [0]
indices.extend(range(-1, 0))

data.ix[:,indices].head()

Unnamed: 0,locationid,dist_to_center
0,1002,3.404767
1,1002,3.404767
2,1002,3.404767
3,1002,3.404767
4,1002,3.404767
