In [1]:
pip install featuretools

Note: you may need to restart the kernel to use updated packages.


In [1]:
import featuretools as ft
import pandas as pd

In [2]:
# Sample data
customers_data = {
    'CustomerID': [101, 102, 103],
    'Name': ['John Doe', 'Jane Smith', 'Mike Jordan'],
    'Email': ['john.doe@example.com', 'jane.smith@example.com', 'mike.jordan@example.com'],
    'SignupDate': ['2023-01-10', '2023-01-15', '2023-01-20']
}
products_data = {
    'ProductID': [201, 202, 203],
    'Name': ['Laptop', 'Tablet', 'Smartphone'],
    'Category': ['Electronics', 'Electronics', 'Electronics'],
    'Price': [1000, 500, 800]
}
orders_data = {
    'OrderID': [301, 302, 303],
    'CustomerID': [101, 102, 103],
    'OrderDate': ['2023-02-01', '2023-02-05', '2023-02-10'],
    'ShipDate': ['2023-02-03', '2023-02-07', '2023-02-12']
}
order_details_data = {
    'OrderID': [301, 302, 303],
    'ProductID': [201, 202, 203],
    'Quantity': [1, 2, 1],
    'Discount': [0, 0.1, 0]
}

In [3]:
# Converting to DataFrames
customers_df = pd.DataFrame(customers_data)
products_df = pd.DataFrame(products_data)
orders_df = pd.DataFrame(orders_data)
order_details_df = pd.DataFrame(order_details_data)

In [4]:
# Creating an EntitySet
es = ft.EntitySet(id='ecommerce_data')

In [5]:
# Adding entities
es = es.add_dataframe(dataframe_name='customers', dataframe=customers_df, index='CustomerID')
es = es.add_dataframe(dataframe_name='products', dataframe=products_df, index='ProductID')
es = es.add_dataframe(dataframe_name='orders', dataframe=orders_df, index='OrderID')
es = es.add_dataframe(dataframe_name='order_details', dataframe=order_details_df, make_index=True, index='details_index')

In [6]:
# Adding relationships
es = es.add_relationship('customers','CustomerID','orders','CustomerID')
es = es.add_relationship('orders','OrderID','order_details','OrderID')
es = es.add_relationship('products','ProductID','order_details','ProductID')
es

Entityset: ecommerce_data
  DataFrames:
    customers [Rows: 3, Columns: 4]
    products [Rows: 3, Columns: 4]
    orders [Rows: 3, Columns: 4]
    order_details [Rows: 3, Columns: 5]
  Relationships:
    orders.CustomerID -> customers.CustomerID
    order_details.OrderID -> orders.OrderID
    order_details.ProductID -> products.ProductID

In [7]:
# Deep Feature Synthesis
feature_matrix, feature_defs = ft.dfs(
    entityset=es, 
    target_dataframe_name='orders', 
    agg_primitives=["sum", "mean", "count"], 
    trans_primitives=["month", "day"], 
    max_depth=2
)
# Displaying new features
feature_matrix

Unnamed: 0_level_0,CustomerID,COUNT(order_details),MEAN(order_details.Discount),MEAN(order_details.Quantity),SUM(order_details.Discount),SUM(order_details.Quantity),DAY(OrderDate),DAY(ShipDate),MONTH(OrderDate),MONTH(ShipDate),MEAN(order_details.products.Price),SUM(order_details.products.Price),customers.COUNT(orders),customers.COUNT(order_details),customers.MEAN(order_details.Discount),customers.MEAN(order_details.Quantity),customers.SUM(order_details.Discount),customers.SUM(order_details.Quantity),customers.DAY(SignupDate),customers.MONTH(SignupDate)
OrderID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
301,101,1,0.0,1.0,0.0,1.0,1,3,2,2,1000.0,1000.0,1,1,0.0,1.0,0.0,1.0,10,1
302,102,1,0.1,2.0,0.1,2.0,5,7,2,2,500.0,500.0,1,1,0.1,2.0,0.1,2.0,15,1
303,103,1,0.0,1.0,0.0,1.0,10,12,2,2,800.0,800.0,1,1,0.0,1.0,0.0,1.0,20,1
