# Testing [Featuretools](https://docs.featuretools.com/en/stable/)

Using this notebook to familiarize myself with the Featuretools module.

In [1]:
import featuretools as ft

Load mock data

In [9]:
data = ft.demo.load_mock_customer()

Customers data

In [None]:
customers_df = data['customers']
customers_df.sample(5)

Sessions data

In [7]:
sessions_df = data['sessions']
sessions_df.sample(5)

Unnamed: 0,session_id,customer_id,device,session_start
13,14,1,tablet,2014-01-01 03:28:00
6,7,3,tablet,2014-01-01 01:39:40
1,2,5,mobile,2014-01-01 00:17:20
29,30,5,desktop,2014-01-01 07:27:25
18,19,3,desktop,2014-01-01 04:27:35


Transactions data

In [8]:
transactions_df = data['transactions']
transactions_df.sample(5)

Unnamed: 0,transaction_id,session_id,transaction_time,product_id,amount
74,232,5,2014-01-01 01:20:10,1,139.2
231,27,17,2014-01-01 04:10:15,2,90.79
434,36,31,2014-01-01 07:50:10,3,62.35
420,56,30,2014-01-01 07:35:00,3,72.7
54,444,4,2014-01-01 00:58:30,4,43.59


Next, create a dictionary storing all entities

In [10]:
entities = {
    'customers': (customers_df, 'customer_id'),
    'sessions': (sessions_df, 'session_id', 'session_start'),
    'transactions': (transactions_df, 'transaction_id', 'transaction_time')
}

Then specify the relationships

In [14]:
relationships = [
    ("sessions", "session_id", "transactions", "session_id"),
    ("customers", "customer_id", "sessions", "customer_id")
]

## Run Deep Feature Synthesis

In [16]:
feature_matrix_customers, feature_defs = ft.dfs(
    entities=entities, 
    relationships=relationships, 
    target_entity="customers"
)

In [17]:
feature_matrix_customers

Unnamed: 0_level_0,zip_code,COUNT(sessions),NUM_UNIQUE(sessions.device),MODE(sessions.device),SUM(transactions.amount),STD(transactions.amount),MAX(transactions.amount),SKEW(transactions.amount),MIN(transactions.amount),MEAN(transactions.amount),...,NUM_UNIQUE(sessions.DAY(session_start)),MODE(sessions.MONTH(session_start)),MODE(sessions.MODE(transactions.product_id)),MODE(sessions.YEAR(session_start)),MODE(sessions.WEEKDAY(session_start)),MODE(sessions.DAY(session_start)),NUM_UNIQUE(transactions.sessions.device),NUM_UNIQUE(transactions.sessions.customer_id),MODE(transactions.sessions.device),MODE(transactions.sessions.customer_id)
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,60091,8,3,mobile,9025.62,40.442059,139.43,0.019698,5.81,71.631905,...,1,1,4,2014,2,1,3,1,mobile,1
2,13244,7,3,desktop,7200.28,37.705178,146.81,0.098259,8.73,77.422366,...,1,1,3,2014,2,1,3,1,desktop,2
3,13244,6,3,desktop,6236.62,43.683296,149.15,0.41823,5.89,67.06043,...,1,1,1,2014,2,1,3,1,desktop,3
4,60091,8,3,mobile,8727.68,45.068765,149.95,-0.036348,5.73,80.070459,...,1,1,1,2014,2,1,3,1,mobile,4
5,60091,6,3,mobile,6349.66,44.09563,149.02,-0.025941,7.55,80.375443,...,1,1,3,2014,2,1,3,1,mobile,5


In [21]:
feature_matrix_customers['MEAN(sessions.MEAN(transactions.amount))']

customer_id
1    72.774140
2    78.415122
3    67.539577
4    81.207189
5    78.705187
Name: MEAN(sessions.MEAN(transactions.amount)), dtype: float64