In [None]:
!pip install featuretools

Collecting featuretools
  Downloading featuretools-1.31.0-py3-none-any.whl.metadata (15 kB)
Collecting woodwork>=0.28.0 (from featuretools)
  Downloading woodwork-0.31.0-py3-none-any.whl.metadata (10 kB)
Downloading featuretools-1.31.0-py3-none-any.whl (587 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.9/587.9 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading woodwork-0.31.0-py3-none-any.whl (215 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m215.2/215.2 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: woodwork, featuretools
Successfully installed featuretools-1.31.0 woodwork-0.31.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

import pandas as pd
import numpy as np

# Create a mock customer DataFrame
customers_df = pd.DataFrame({
    'customer_id': [1, 2, 3, 4, 5],
    'joined': pd.to_datetime(['2018-01-01', '2018-01-02', '2018-02-01', '2018-02-02', '2018-03-01']),
    'loves_ice_cream': [True, False, False, True, True],
    'favorite_number': [42, 28, 3, 8, 11]
})

# Create a mock transactions DataFrame
transactions_df = pd.DataFrame({
    'transaction_id': range(500),
    'customer_id': np.random.choice([1, 2, 3, 4, 5], 500),
    'amount': np.random.rand(500) * 100,
    'transaction_time': pd.date_range('2023-01-01', periods=500, freq='H')
})

In [None]:
customers_df

Unnamed: 0,customer_id,joined,loves_ice_cream,favorite_number
0,1,2018-01-01,True,42
1,2,2018-01-02,False,28
2,3,2018-02-01,False,3
3,4,2018-02-02,True,8
4,5,2018-03-01,True,11


In [None]:
transactions_df

Unnamed: 0,transaction_id,customer_id,amount,transaction_time
0,0,4,93.518231,2023-01-01 00:00:00
1,1,4,12.163922,2023-01-01 01:00:00
2,2,3,16.139716,2023-01-01 02:00:00
3,3,2,46.105018,2023-01-01 03:00:00
4,4,1,88.193475,2023-01-01 04:00:00
...,...,...,...,...
495,495,2,81.521830,2023-01-21 15:00:00
496,496,3,60.985661,2023-01-21 16:00:00
497,497,4,66.806382,2023-01-21 17:00:00
498,498,1,20.833348,2023-01-21 18:00:00


In [None]:
import featuretools as ft

# Create a new entityset
es = ft.EntitySet(id='customer_data')

# Add the customer dataframe as an entity
es = es.add_dataframe(dataframe_name='customers', dataframe=customers_df, index='customer_id')

# Add the transactions dataframe as an entity and specify the relationship
es = es.add_dataframe(dataframe_name='transactions', dataframe=transactions_df,
                      index='transaction_id', time_index='transaction_time')

es = es.add_relationship('customers', 'customer_id', 'transactions', 'customer_id')

In [None]:
# Automatically generate features
feature_matrix, feature_defs = ft.dfs(entityset=es, target_dataframe_name='customers')

  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)
  ).agg(to_agg)


In [None]:
feature_matrix.head()

Unnamed: 0_level_0,loves_ice_cream,favorite_number,COUNT(transactions),MAX(transactions.amount),MEAN(transactions.amount),MIN(transactions.amount),SKEW(transactions.amount),STD(transactions.amount),SUM(transactions.amount),DAY(joined),...,WEEKDAY(joined),YEAR(joined),MODE(transactions.DAY(transaction_time)),MODE(transactions.MONTH(transaction_time)),MODE(transactions.WEEKDAY(transaction_time)),MODE(transactions.YEAR(transaction_time)),NUM_UNIQUE(transactions.DAY(transaction_time)),NUM_UNIQUE(transactions.MONTH(transaction_time)),NUM_UNIQUE(transactions.WEEKDAY(transaction_time)),NUM_UNIQUE(transactions.YEAR(transaction_time))
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,True,42,105,98.818875,46.56733,3.510532,0.201058,27.531262,4889.569632,1,...,0,2018,13,1,6,2023,20,1,7,1
2,False,28,121,98.791077,46.131805,0.18275,0.232723,28.262803,5581.948353,2,...,1,2018,5,1,0,2023,21,1,7,1
3,False,3,96,98.914812,50.559248,0.3966,0.029818,28.294532,4853.687795,1,...,3,2018,3,1,1,2023,21,1,7,1
4,True,8,86,99.235374,48.741681,1.651393,0.111845,26.797198,4191.784563,2,...,4,2018,18,1,5,2023,21,1,7,1
5,True,11,92,99.298725,52.280196,0.00428,-0.095157,29.418096,4809.778059,1,...,3,2018,2,1,0,2023,20,1,7,1


In [None]:
feature_matrix.columns

Index(['loves_ice_cream', 'favorite_number', 'COUNT(transactions)',
       'MAX(transactions.amount)', 'MEAN(transactions.amount)',
       'MIN(transactions.amount)', 'SKEW(transactions.amount)',
       'STD(transactions.amount)', 'SUM(transactions.amount)', 'DAY(joined)',
       'MONTH(joined)', 'WEEKDAY(joined)', 'YEAR(joined)',
       'MODE(transactions.DAY(transaction_time))',
       'MODE(transactions.MONTH(transaction_time))',
       'MODE(transactions.WEEKDAY(transaction_time))',
       'MODE(transactions.YEAR(transaction_time))',
       'NUM_UNIQUE(transactions.DAY(transaction_time))',
       'NUM_UNIQUE(transactions.MONTH(transaction_time))',
       'NUM_UNIQUE(transactions.WEEKDAY(transaction_time))',
       'NUM_UNIQUE(transactions.YEAR(transaction_time))'],
      dtype='object')