## Taking a look at the `featuretools` library's quick start
https://docs.featuretools.com/

In [4]:
import featuretools as ft

# load mock data
mock_data = ft.demo.load_mock_customer()

customers_df = mock_data['customers']
sessions_df = mock_data['sessions']
transactions_df = mock_data['transactions']

In [33]:
def inspect_df(df, df_name):
    print '*****' + df_name + '*****'
    print 
    print 'Rows: ' + str(len(df))
    print 'Columns: ' + str(len(df.columns))
    print df.head()
    print df.describe(include='all')
    print
    print

In [34]:
dfs = {'customers':customers_df,'sessions':sessions_df,'transactions':transactions_df}

for i in dfs.keys():
    inspect_df(dfs[i], i)

*****customers*****

Rows: 5
Columns: 3
   customer_id zip_code  join_date
0            1    60091 2008-01-01
1            2    02139 2008-02-20
2            3    02139 2008-04-10
3            4    60091 2008-05-30
4            5    02139 2008-07-19
        customer_id zip_code            join_date
count      5.000000        5                    5
unique          NaN        2                    5
top             NaN    02139  2008-05-30 00:00:00
freq            NaN        3                    1
first           NaN      NaN  2008-01-01 00:00:00
last            NaN      NaN  2008-07-19 00:00:00
mean       3.000000      NaN                  NaN
std        1.581139      NaN                  NaN
min        1.000000      NaN                  NaN
25%        2.000000      NaN                  NaN
50%        3.000000      NaN                  NaN
75%        4.000000      NaN                  NaN
max        5.000000      NaN                  NaN


*****transactions*****

Rows: 500
Columns: 5
   

In [31]:
# required definitions
entities = {
    "customers" : (customers_df, "customer_id"),
    "sessions" : (sessions_df, "session_id", "session_start"),
    "transactions" : (transactions_df, "transaction_id", "transaction_time")
    }

relationships = [("sessions", "session_id", "transactions", "session_id"),
    ("customers", "customer_id", "sessions", "customer_id")]

In [32]:
feature_matrix_customers, feature_defs = ft.dfs(entities=entities, 
                                            relationships=relationships,
                                            target_entity='customers')

feature_matrix_customers

Unnamed: 0_level_0,zip_code,COUNT(transactions),COUNT(sessions),SUM(transactions.amount),MODE(sessions.device),MIN(transactions.amount),MAX(transactions.amount),YEAR(join_date),SKEW(transactions.amount),DAY(join_date),...,SUM(sessions.MIN(transactions.amount)),MAX(sessions.SKEW(transactions.amount)),MAX(sessions.MIN(transactions.amount)),SUM(sessions.MEAN(transactions.amount)),STD(sessions.SUM(transactions.amount)),STD(sessions.MEAN(transactions.amount)),SKEW(sessions.MEAN(transactions.amount)),STD(sessions.MAX(transactions.amount)),NUM_UNIQUE(sessions.DAY(session_start)),MIN(sessions.SKEW(transactions.amount))
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,60091,131,10,10236.77,desktop,5.6,149.95,2008,0.070041,1,...,169.77,0.610052,41.95,791.976505,175.939423,9.299023,-0.37715,5.857976,1,-0.395358
2,2139,122,8,9118.81,mobile,5.81,149.15,2008,0.028647,20,...,114.85,0.492531,42.96,596.243506,230.333502,10.925037,0.96235,7.42048,1,-0.470007
3,2139,78,5,5758.24,desktop,6.78,147.73,2008,0.070814,10,...,64.98,0.645728,21.77,369.770121,471.048551,9.819148,-0.244976,12.537259,1,-0.630425
4,60091,111,8,8205.28,desktop,5.73,149.56,2008,0.087986,30,...,83.53,0.516262,17.27,584.673126,322.883448,13.065436,-0.548969,12.738488,1,-0.497169
5,2139,58,4,4571.37,tablet,5.91,148.17,2008,0.085883,19,...,73.09,0.830112,27.46,313.448942,198.522508,8.950528,0.098885,5.599228,1,-0.396571


In [36]:
feature_defs

[<Feature: zip_code>,
 <Feature: COUNT(transactions)>,
 <Feature: COUNT(sessions)>,
 <Feature: SUM(transactions.amount)>,
 <Feature: MODE(sessions.device)>,
 <Feature: MIN(transactions.amount)>,
 <Feature: MAX(transactions.amount)>,
 <Feature: YEAR(join_date)>,
 <Feature: SKEW(transactions.amount)>,
 <Feature: DAY(join_date)>,
 <Feature: NUM_UNIQUE(sessions.device)>,
 <Feature: MONTH(join_date)>,
 <Feature: MEAN(transactions.amount)>,
 <Feature: NUM_UNIQUE(transactions.product_id)>,
 <Feature: WEEKDAY(join_date)>,
 <Feature: MODE(transactions.product_id)>,
 <Feature: STD(transactions.amount)>,
 <Feature: SKEW(sessions.STD(transactions.amount))>,
 <Feature: SKEW(sessions.SUM(transactions.amount))>,
 <Feature: NUM_UNIQUE(sessions.WEEKDAY(session_start))>,
 <Feature: MAX(sessions.NUM_UNIQUE(transactions.product_id))>,
 <Feature: MIN(sessions.STD(transactions.amount))>,
 <Feature: MODE(sessions.WEEKDAY(session_start))>,
 <Feature: MEAN(sessions.COUNT(transactions))>,
 <Feature: SUM(session

In [35]:
inspect_df(feature_matrix_customers, 'features')

*****features*****

Rows: 5
Columns: 69
            zip_code  COUNT(transactions)  COUNT(sessions)  \
customer_id                                                  
1              60091                  131               10   
2              02139                  122                8   
3              02139                   78                5   
4              60091                  111                8   
5              02139                   58                4   

             SUM(transactions.amount) MODE(sessions.device)  \
customer_id                                                   
1                            10236.77               desktop   
2                             9118.81                mobile   
3                             5758.24               desktop   
4                             8205.28               desktop   
5                             4571.37                tablet   

             MIN(transactions.amount)  MAX(transactions.amount)  \
customer_id     

In [None]:

feature_matrix_customers

In [38]:
# now apply to sessions
feature_matrix_sessions, feature_session_defs = ft.dfs(entities=entities, 
                                            relationships=relationships,
                                            target_entity='sessions')

feature_matrix_sessions.head()

Unnamed: 0_level_0,customer_id,device,WEEKDAY(session_start),MONTH(session_start),MODE(transactions.product_id),MEAN(transactions.amount),customers.zip_code,DAY(session_start),MIN(transactions.amount),NUM_UNIQUE(transactions.product_id),...,customers.MODE(sessions.device),customers.WEEKDAY(join_date),MODE(transactions.MONTH(transaction_time)),customers.COUNT(transactions),customers.MAX(transactions.amount),customers.MIN(transactions.amount),MODE(transactions.YEAR(transaction_time)),MODE(transactions.WEEKDAY(transaction_time)),NUM_UNIQUE(transactions.MONTH(transaction_time)),NUM_UNIQUE(transactions.DAY(transaction_time))
session_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1,desktop,2,1,2,77.84625,60091,1,5.6,5,...,desktop,1,1,131,149.95,5.6,2014,2,1,1
2,1,desktop,2,1,3,89.533,60091,1,8.67,4,...,desktop,1,1,131,149.95,5.6,2014,2,1,1
3,5,mobile,2,1,5,67.13,2139,1,20.91,5,...,tablet,5,1,58,148.17,5.91,2014,2,1,1
4,3,mobile,2,1,1,82.1728,2139,1,8.7,5,...,desktop,3,1,78,147.73,6.78,2014,2,1,1
5,2,tablet,2,1,1,65.031818,2139,1,6.29,5,...,mobile,2,1,122,149.15,5.81,2014,2,1,1
