# Segmentation 1 - Offline

The goal of this notebook is to build features that encode the business logic of sending promocodes to customers.

## Setup environment

In [1]:
from IPython.display import HTML, display

import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [2]:
import featuremesh
%reload_ext featuremesh

from featuremesh import generate_access_token, decode_token

from libs.helpers.utils import get_featuremesh_config

fm_config = get_featuremesh_config()

featuremesh.set_default('registry.host', fm_config['registry.host'])
featuremesh.set_default('access.host', fm_config['access.host'])

__YOUR_IDENTITY_TOKEN__ = fm_config['identity_token']
__YOUR_ACCESS_TOKEN__ = generate_access_token(identity_token=__YOUR_IDENTITY_TOKEN__, project='default')

# decode_token(__YOUR_ACCESS_TOKEN__)

In [3]:
from libs.helpers.utils_db import query_duckdb as query_duckdb_direct

def query_duckdb(sql: str) -> pd.DataFrame:
    return query_duckdb_direct(sql, storage_path="/files/local1.db")  # You need to interrupt the kernel of notebooks that run the same connexion

client_duckdb = featuremesh.OfflineClient(
    access_token=__YOUR_ACCESS_TOKEN__, 
    backend=featuremesh.Backend.DUCKDB, 
    sql_executor=query_duckdb
)

featuremesh.set_default('client', client_duckdb)  # Allow to use %%featureql magic command without specifying the client

## Using the TPC-H dataset in DuckDB

In [4]:
query_duckdb("SELECT * FROM TPCH.CUSTOMER LIMIT 10")

Unnamed: 0,c_custkey,c_name,c_address,c_nationkey,c_phone,c_acctbal,c_mktsegment,c_comment
0,1,Customer#000000001,j5JsirBM9PsCy0O1m,15,25-989-741-2988,711.56,BUILDING,y final requests wake slyly quickly special accounts. blithely
1,2,Customer#000000002,487LW1dovn6Q4dMVymKwwLE9OKf3QG,13,23-768-687-3665,121.65,AUTOMOBILE,y carefully regular foxes. slyly regular requests about the bli
2,3,Customer#000000003,fkRGN8nY4pkE,1,11-719-748-3364,7498.12,AUTOMOBILE,fully. carefully silent instructions sleep alongside of the slyly regular asymptotes. quickly regular
3,4,Customer#000000004,4u58h fqkyE,4,14-128-190-5944,2866.83,MACHINERY,sublate. fluffily even instructions are about th
4,5,Customer#000000005,hwBtxkoBF qSW4KrIk5U 2B1AU7H,3,13-750-942-6364,794.47,HOUSEHOLD,equests haggle furiously against the pending packa
5,6,Customer#000000006,"g1s,pzDenUEBW3O,2 pxu0f9n2g64rJrt5E",20,30-114-968-4951,7638.57,AUTOMOBILE,quickly silent asymptotes are slyly regular excuses. instructions wake furiously? quickly bold courts p
6,7,Customer#000000007,8OkMVLQ1dK6Mbu6WG9 w4pLGQ n7MQ,18,28-190-982-9759,9561.95,AUTOMOBILE,"ounts. ironic, regular accounts sleep. final requests haggle quickly after the"
7,8,Customer#000000008,"j,pZ,Qp,qtFEo0r0c 92qobZtlhSuOqbE4JGV",17,27-147-574-9335,6819.74,BUILDING,riously final excuses sublate quickly among the fluffily even foxes. quickly final packages haggle furiously furi
8,9,Customer#000000009,vgIql8H6zoyuLMFNdAMLyE7 H9,8,18-338-906-3675,8324.07,FURNITURE,ss pinto beans believe slyly quiet deposits-- doggedly bold packages boost. quickly ironic de
9,10,Customer#000000010,"Vf mQ6Ug9Ucf5OKGYq fsaX AtfsO7,rwY",5,15-741-346-9870,2753.54,HOUSEHOLD,g quickly after the evenly bold


We are also going to use the CUSTOMER_HISTORY table created in the "Init DuckDB" notebook

In [5]:
query_duckdb("SELECT * FROM TPCH.CUSTOMER_HISTORY LIMIT 3")

Unnamed: 0,h_custkey,h_numorders,h_listorders,h_datefirstorder,h_datelastorder,h_totalprice
0,1,9,"[{'o_orderkey': 579908, 'o_orderdate': 1996-12-09, 'o_totalprice': 45744.09}, {'o_orderkey': 36422, 'o_orderdate': 1997-03-04, 'o_totalprice': 268835.44}, {'o_orderkey': 135943, 'o_orderdate': 1993-06-22, 'o_totalprice': 263247.54}, {'o_orderkey': 164711, 'o_orderdate': 1992-04-26, 'o_totalprice': 283261.47}, {'o_orderkey': 224167, 'o_orderdate': 1996-05-08, 'o_totalprice': 81485.84}, {'o_orderkey': 287619, 'o_orderdate': 1996-12-26, 'o_totalprice': 11925.85}, {'o_orderkey': 385825, 'o_orderdate': 1995-11-01, 'o_totalprice': 235155.22}, {'o_orderkey': 430243, 'o_orderdate': 1994-12-24, 'o_totalprice': 35523.05}, {'o_orderkey': 454791, 'o_orderdate': 1992-04-19, 'o_totalprice': 83779.26}]",1992-04-19,1997-03-04,1308957.76
1,2,11,"[{'o_orderkey': 491620, 'o_orderdate': 1998-05-22, 'o_totalprice': 122500.55}, {'o_orderkey': 9154, 'o_orderdate': 1997-06-23, 'o_totalprice': 299326.4}, {'o_orderkey': 52263, 'o_orderdate': 1994-05-08, 'o_totalprice': 36433.77}, {'o_orderkey': 90019, 'o_orderdate': 1993-10-28, 'o_totalprice': 96852.91}, {'o_orderkey': 100064, 'o_orderdate': 1996-04-10, 'o_totalprice': 51599.57}, {'o_orderkey': 120160, 'o_orderdate': 1995-04-09, 'o_totalprice': 209272.43}, {'o_orderkey': 212870, 'o_orderdate': 1996-10-30, 'o_totalprice': 168931.8}, {'o_orderkey': 269922, 'o_orderdate': 1996-03-19, 'o_totalprice': 108967.23}, {'o_orderkey': 306439, 'o_orderdate': 1997-05-17, 'o_totalprice': 222236.47}, {'o_orderkey': 360067, 'o_orderdate': 1992-12-07, 'o_totalprice': 195693.26}, {'o_orderkey': 374723, 'o_orderdate': 1996-11-20, 'o_totalprice': 233181.71}]",1992-12-07,1998-05-22,1744996.1
2,4,20,"[{'o_orderkey': 512195, 'o_orderdate': 1996-08-13, 'o_totalprice': 44791.42}, {'o_orderkey': 529350, 'o_orderdate': 1996-03-03, 'o_totalprice': 33563.9}, {'o_orderkey': 545218, 'o_orderdate': 1992-07-16, 'o_totalprice': 206615.3}, {'o_orderkey': 554115, 'o_orderdate': 1992-10-11, 'o_totalprice': 222977.75}, {'o_orderkey': 576263, 'o_orderdate': 1994-03-02, 'o_totalprice': 274992.65}, {'o_orderkey': 24322, 'o_orderdate': 1997-01-29, 'o_totalprice': 268534.86}, {'o_orderkey': 43879, 'o_orderdate': 1993-08-13, 'o_totalprice': 80130.69}, {'o_orderkey': 53283, 'o_orderdate': 1995-10-29, 'o_totalprice': 162955.31}, {'o_orderkey': 70819, 'o_orderdate': 1996-11-20, 'o_totalprice': 240814.11}, {'o_orderkey': 83684, 'o_orderdate': 1998-03-19, 'o_totalprice': 71483.64}, {'o_orderkey': 160516, 'o_orderdate': 1995-09-18, 'o_totalprice': 181789.2}, {'o_orderkey': 193030, 'o_orderdate': 1992-06-09, 'o_totalprice': 261208.46}, {'o_orderkey': 226818, 'o_orderdate': 1995-05-13, 'o_totalprice': 107127.51}, {'o_orderkey': 235779, 'o_orderdate': 1994-04-29, 'o_totalprice': 199636.4}, {'o_orderkey': 301350, 'o_orderdate': 1996-08-25, 'o_totalprice': 281282.72}, {'o_orderkey': 330404, 'o_orderdate': 1996-09-22, 'o_totalprice': 280809.61}, {'o_orderkey': 345858, 'o_orderdate': 1998-06-15, 'o_totalprice': 21012.53}, {'o_orderkey': 346693, 'o_orderdate': 1993-11-13, 'o_totalprice': 66417.98}, {'o_orderkey': 358886, 'o_orderdate': 1995-08-28, 'o_totalprice': 282207.37}, {'o_orderkey': 446499, 'o_orderdate': 1997-03-09, 'o_totalprice': 15671.83}]",1992-06-09,1998-06-15,3304023.24


## [Offline use-case] Send promocodes by email

### Prototyping

Business logic: 
- Registered more than 6 months ago
- No order in the last 30 days
- Total orders > 1000USD

In [6]:
%%featureql

WITH
    -- CUSTOMER ENTITY
    CUSTOMERS := ENTITY(),
    CUSTOMER_ID := INPUT(BIGINT#CUSTOMERS),
                         
    -- ORDER ENTITY
    ORDERS := ENTITY(),
    ORDER_ID := INPUT(BIGINT#ORDERS),

    SOURCE_CUSTOMER := EXTERNAL_COLUMNS(
        c_custkey BIGINT#CUSTOMERS BOUND TO CUSTOMER_ID,
        c_mktsegment VARCHAR
        FROM TABLE(tpch.customer)
    ),
    CUSTOMER_MKTSEGMENT := SOURCE_CUSTOMER[c_mktsegment],

    SOURCE_CUSTOMER_HISTORY := EXTERNAL_COLUMNS(
        h_custkey BIGINT#CUSTOMERS BOUND TO CUSTOMER_ID,
        h_datefirstorder DATE, 
        h_listorders ARRAY(ROW(o_orderkey BIGINT#ORDERS, o_orderdate DATE, o_totalprice DOUBLE))
        FROM TABLE(tpch.customer_history)
    ),
    CUSTOMER_DATEFIRST := SOURCE_CUSTOMER_HISTORY[h_datefirstorder],
    CUSTOMER_ORDERS := SOURCE_CUSTOMER_HISTORY[h_listorders],

SELECT 
    CUSTOMER_ID                 := BIND_VALUES(ARRAY[1,2,3,4]),
    CUSTOMER_MKTSEGMENT,
    -- CUSTOMER_ORDERS,
    IS_OLD_CUSTOMER             := DATE_DIFF(CAST(CUSTOMER_DATEFIRST AS TIMESTAMP), TIMESTAMP '1998-08-02', 'MONTH') > 6,
    LIFETIME_REVENUE_GT_1000000 := ARRAY_SUM(CUSTOMER_ORDERS[o_totalprice]) >.5 1000000e0,
    HAS_RECENT_ORDER            := DATE_DIFF(CAST(ARRAY_MAX(CUSTOMER_ORDERS[o_orderdate]) AS TIMESTAMP), TIMESTAMP '1998-08-02', 'DAY') < 60,
    SEND_PROMOCODE              := IS_OLD_CUSTOMER AND LIFETIME_REVENUE_GT_1000000 AND NOT HAS_RECENT_ORDER

Unnamed: 0,CUSTOMER_ID,CUSTOMER_MKTSEGMENT,IS_OLD_CUSTOMER,LIFETIME_REVENUE_GT_1000000,HAS_RECENT_ORDER,SEND_PROMOCODE
0,1,BUILDING,True,True,False,True
1,2,AUTOMOBILE,True,True,False,True
2,4,MACHINERY,True,True,True,False
3,3,AUTOMOBILE,,,,


### Persist features

You just need to put the features as prototyped in a namespace using `CREATE FEATURES IN`

In [7]:
%%featureql

DROP FEATURES IF EXISTS IN FM.DEMOS

Unnamed: 0,feature_name,status,message
0,FM.DEMOS.SEND_PROMOCODE,DELETED,Feature successfully deleted
1,FM.DEMOS.HAS_RECENT_ORDER,DELETED,Feature successfully deleted
2,FM.DEMOS.LIFETIME_REVENUE_GT_1000000,DELETED,Feature successfully deleted
3,FM.DEMOS.IS_OLD_CUSTOMER,DELETED,Feature successfully deleted
4,FM.DEMOS.CUSTOMER_TOTALPRICE,DELETED,Feature successfully deleted
5,FM.DEMOS.CUSTOMER_DATELAST,DELETED,Feature successfully deleted
6,FM.DEMOS.CUSTOMER_ORDERS,DELETED,Feature successfully deleted
7,FM.DEMOS.CUSTOMER_DATEFIRST,DELETED,Feature successfully deleted
8,FM.DEMOS.SOURCE_CUSTOMER_HISTORY,DELETED,Feature successfully deleted
9,FM.DEMOS.CUSTOMER_MKTSEGMENT,DELETED,Feature successfully deleted


In [8]:
%%featureql

CREATE OR REPLACE FEATURES IN FM.DEMOS AS
SELECT

    FM.CORE.CUSTOMERS := ENTITY(),
    FM.CORE.CUSTOMER_ID := INPUT(BIGINT#FM.CORE.CUSTOMERS),
                                 
    FM.CORE.ORDERS := ENTITY(),
    FM.CORE.ORDER_ID := INPUT(BIGINT#FM.CORE.ORDERS),

    SOURCE_CUSTOMER := EXTERNAL_COLUMNS(
        c_custkey BIGINT#FM.CORE.CUSTOMERS BOUND TO FM.CORE.CUSTOMER_ID,
        c_mktsegment VARCHAR
        FROM TABLE(tpch.customer)
    ),
    CUSTOMER_MKTSEGMENT := SOURCE_CUSTOMER[c_mktsegment],

    SOURCE_CUSTOMER_HISTORY := EXTERNAL_COLUMNS(
        h_custkey BIGINT#FM.CORE.CUSTOMERS BOUND TO FM.CORE.CUSTOMER_ID,
        h_datefirstorder DATE, 
        h_listorders ARRAY(ROW(o_orderkey BIGINT#ORDERS, o_orderdate DATE, o_totalprice DOUBLE))
        FROM TABLE(tpch.customer_history)
    ),
    CUSTOMER_DATEFIRST := SOURCE_CUSTOMER_HISTORY[h_datefirstorder],
    CUSTOMER_ORDERS := SOURCE_CUSTOMER_HISTORY[h_listorders],

    CUSTOMER_DATELAST           := ARRAY_MAX(CUSTOMER_ORDERS[o_orderdate]),
    CUSTOMER_TOTALPRICE         := ARRAY_SUM(CUSTOMER_ORDERS[o_totalprice]),
    IS_OLD_CUSTOMER             := DATE_DIFF(CAST(CUSTOMER_DATEFIRST AS TIMESTAMP), TIMESTAMP '1998-08-02', 'DAY') > 180,
    LIFETIME_REVENUE_GT_1000000 := CUSTOMER_TOTALPRICE >.5 1000000e0,
    HAS_RECENT_ORDER            := DATE_DIFF(CAST(CUSTOMER_DATELAST AS TIMESTAMP), TIMESTAMP '1998-08-02', 'DAY') < 60,
    SEND_PROMOCODE              := IS_OLD_CUSTOMER AND LIFETIME_REVENUE_GT_1000000 AND NOT HAS_RECENT_ORDER

Unnamed: 0,feature_name,status,message
0,FM.CORE.CUSTOMERS,REPLACED,Feature was replaced
1,FM.CORE.CUSTOMER_ID,REPLACED,Feature was replaced
2,FM.CORE.ORDERS,REPLACED,Feature was replaced
3,FM.CORE.ORDER_ID,REPLACED,Feature was replaced
4,FM.DEMOS.SOURCE_CUSTOMER,CREATED,Feature created as not exists
5,FM.DEMOS.CUSTOMER_MKTSEGMENT,CREATED,Feature created as not exists
6,FM.DEMOS.SOURCE_CUSTOMER_HISTORY,CREATED,Feature created as not exists
7,FM.DEMOS.CUSTOMER_DATEFIRST,CREATED,Feature created as not exists
8,FM.DEMOS.CUSTOMER_ORDERS,CREATED,Feature created as not exists
9,FM.DEMOS.CUSTOMER_DATELAST,CREATED,Feature created as not exists


In [9]:
%%featureql

SHOW FEATURES IN FM.DEMOS ORDER BY NAME LIMIT 3

Unnamed: 0,NAME,DATATYPE,FUNCTION,INPUTS,FORMULA,RESTRICTIONS,STATUS,META,SIGNATURE,POSITION,DEPENDENCIES,CREATED_AT,CREATED_BY,UPDATED_AT,UPDATED_BY
0,FM.DEMOS.CUSTOMER_DATEFIRST,DATE,EXTRACT,"[""FM.CORE.CUSTOMER_ID""]",\n \n \n,"[""OFFLINE""]",DEV,{},EXTRACT.custom.DATE,50,"{""FM.CORE.CUSTOMER_ID"": 1, ""FM.DEMOS.CUSTOMER_DATEFIRST"": 3, ""FM.DEMOS.SOURCE_CUSTOMER_HISTORY"": 2}",2026-02-03 14:25:29.323000,143eb6eb-727d-409e-9d31-31e8657abbed,2026-02-03 14:25:29.323000,143eb6eb-727d-409e-9d31-31e8657abbed
1,FM.DEMOS.CUSTOMER_DATELAST,DATE,ARRAY_MAX,"[""FM.CORE.CUSTOMER_ID""]",ARRAY_MAX(\n \n \n ),[],DEV,{},ARRAY_MAX.ARRAYDATE.DATE,50,"{""FM.CORE.CUSTOMER_ID"": 1, ""UNNAMED_FEATURE_2EYDJC"": 4, ""FM.DEMOS.CUSTOMER_ORDERS"": 3, ""FM.DEMOS.CUSTOMER_DATELAST"": 5, ""FM.DEMOS.SOURCE_CUSTOMER_HISTORY"": 2}",2026-02-03 14:25:29.328000,143eb6eb-727d-409e-9d31-31e8657abbed,2026-02-03 14:25:29.328000,143eb6eb-727d-409e-9d31-31e8657abbed
2,FM.DEMOS.CUSTOMER_MKTSEGMENT,VARCHAR,EXTRACT,"[""FM.CORE.CUSTOMER_ID""]",\n \n \n,"[""OFFLINE""]",DEV,{},EXTRACT.custom.VARCHAR,50,"{""FM.CORE.CUSTOMER_ID"": 1, ""FM.DEMOS.SOURCE_CUSTOMER"": 2, ""FM.DEMOS.CUSTOMER_MKTSEGMENT"": 3}",2026-02-03 14:25:29.315000,143eb6eb-727d-409e-9d31-31e8657abbed,2026-02-03 14:25:29.315000,143eb6eb-727d-409e-9d31-31e8657abbed


In [10]:
%%featureql

SELECT 
    FM.CORE.CUSTOMER_ID := BIND_VALUES(ARRAY[1,2,3,4,5]),
    FM.DEMOS.IS_OLD_CUSTOMER,
    FM.DEMOS.LIFETIME_REVENUE_GT_1000000,
    FM.DEMOS.HAS_RECENT_ORDER,
    FM.DEMOS.SEND_PROMOCODE
ORDER BY FM.CORE.CUSTOMER_ID

Unnamed: 0,FM.CORE.CUSTOMER_ID,FM.DEMOS.IS_OLD_CUSTOMER,FM.DEMOS.LIFETIME_REVENUE_GT_1000000,FM.DEMOS.HAS_RECENT_ORDER,FM.DEMOS.SEND_PROMOCODE
0,1,True,True,False,True
1,2,True,True,False,True
2,3,,,,
3,4,True,True,True,False
4,5,True,True,True,False
