# Featuretools Summary
- 実際に使うときは、1つのデータフレームからやる場合で考えたほうがシンプルで良い気がする
- Ref. https://stackoverflow.com/questions/50145953/how-to-apply-deep-feature-synthesis-to-a-single-table

In [1]:
import pandas as pd
import featuretools as ft
print(ft.__version__)

0.7.1


In [2]:
df = pd.DataFrame({
    'index': [1, 2, 3, 4, 5],
    'place': ['shinjyuku','shinjyuku','shibuya','osaki','shibuya'],
    'sales': [10, 12, 15, 8, 9]})
df['date'] = pd.date_range('2019-01-11', periods=5, freq='D')

In [3]:
df

Unnamed: 0,index,place,sales,date
0,1,shinjyuku,10,2019-01-11
1,2,shinjyuku,12,2019-01-12
2,3,shibuya,15,2019-01-13
3,4,osaki,8,2019-01-14
4,5,shibuya,9,2019-01-15


In [4]:
es = ft.EntitySet('sales_records')
es.entity_from_dataframe(dataframe=df, entity_id='log', index='index', time_index='date')

Entityset: sales_records
  Entities:
    log [Rows: 5, Columns: 4]
  Relationships:
    No relationships

In [5]:
feature_matrix, feature_def = ft.dfs(entityset=es, target_entity='log', agg_primitives=['count', 'sum', 'mean'], trans_primitives=['day'], max_depth = 2)

In [6]:
feature_matrix

Unnamed: 0_level_0,place,sales,DAY(date)
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,shinjyuku,10,11
2,shinjyuku,12,12
3,shibuya,15,13
4,osaki,8,14
5,shibuya,9,15


In [7]:
es.normalize_entity(
    base_entity_id='log', 
    new_entity_id='station',
    index='place',
    additional_variables = None
)

Entityset: sales_records
  Entities:
    log [Rows: 5, Columns: 4]
    station [Rows: 3, Columns: 2]
  Relationships:
    log.place -> station.place

In [8]:
es["station"].df

Unnamed: 0,place,first_log_time
shinjyuku,shinjyuku,2019-01-11
shibuya,shibuya,2019-01-13
osaki,osaki,2019-01-14


In [9]:
feature_matrix, feature_def = ft.dfs(entityset=es, target_entity='log', agg_primitives=['count', 'sum', 'mean'], trans_primitives=['day'], max_depth = 1)

In [10]:
feature_matrix

Unnamed: 0_level_0,place,sales,DAY(date)
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,shinjyuku,10,11
2,shinjyuku,12,12
3,shibuya,15,13
4,osaki,8,14
5,shibuya,9,15


In [11]:
feature_matrix, feature_def = ft.dfs(entityset=es, target_entity='log', agg_primitives=['count', 'sum', 'mean'], trans_primitives=['day'], max_depth = 2)

In [12]:
feature_matrix

Unnamed: 0_level_0,place,sales,DAY(date),station.COUNT(log),station.SUM(log.sales),station.MEAN(log.sales),station.DAY(first_log_time)
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,shinjyuku,10,11,2,22,11,11
2,shinjyuku,12,12,2,22,11,11
3,shibuya,15,13,2,24,12,13
4,osaki,8,14,1,8,8,14
5,shibuya,9,15,2,24,12,13


In [13]:
feature_matrix, feature_def = ft.dfs(entityset=es, target_entity='log', agg_primitives=['count', 'sum', 'mean'], trans_primitives=['day'], max_depth = 3)

In [14]:
feature_matrix

Unnamed: 0_level_0,place,sales,DAY(date),station.COUNT(log),station.SUM(log.sales),station.MEAN(log.sales),station.DAY(first_log_time)
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,shinjyuku,10,11,2,22,11,11
2,shinjyuku,12,12,2,22,11,11
3,shibuya,15,13,2,24,12,13
4,osaki,8,14,1,8,8,14
5,shibuya,9,15,2,24,12,13
