# Segmentation 2 - Online

The goal of this notebook is to reuse the features created in offline to call it from production code, so that we can show promocodes on the app.

## Setup environment

In [1]:
from IPython.display import HTML, display

import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [2]:
import featuremesh
%reload_ext featuremesh

from featuremesh import generate_access_token, decode_token

from libs.helpers.utils import get_featuremesh_config

fm_config = get_featuremesh_config()

featuremesh.set_default('registry.host', fm_config['registry.host'])
featuremesh.set_default('access.host', fm_config['access.host'])

__YOUR_IDENTITY_TOKEN__ = fm_config['identity_token']
__YOUR_ACCESS_TOKEN__ = generate_access_token(identity_token=__YOUR_IDENTITY_TOKEN__, project='default')

# decode_token(__YOUR_ACCESS_TOKEN__)

In [3]:
from libs.helpers.utils_db import query_duckdb as query_duckdb_direct

def query_duckdb(sql: str) -> pd.DataFrame:
    return query_duckdb_direct(sql, storage_path="/files/local1.db")  # You need to interrupt the kernel of notebooks that run the same connexion
    
client_online = featuremesh.OnlineClient(
    access_token=__YOUR_ACCESS_TOKEN__
)

featuremesh.set_default('client', client_online)  # Allow to use %%featureql magic command without specifying the client

## Step 1: Use a low latency key-value store (redis)

In [4]:
from libs.helpers.utils import get_redis_connection_config

import redis
try:
    redis_config = get_redis_connection_config(port=6380)
    r = redis.Redis(**redis_config)
    r.ping()
    print("Connected to Redis successfully")
except redis.exceptions.ConnectionError:
    print("Could not connect to Redis. Please check if Redis is running and the connection details are correct.")
except redis.exceptions.TimeoutError:
    print("Connection to Redis timed out. Please check your network settings and Redis server status.")
except Exception as e:
    print(f"An error occurred: {str(e)}")

Connected to Redis successfully


In [5]:
r.flushall()

True

In [6]:
# Write data to Redis
r.set('my_key', 'Hello, Redis!')

# Write data to Redis through a COMMAND
r.execute_command('SET customer_id:1 orders_of_customer_1')

# Read data from Redis
value = r.get('my_key')
print(value.decode('utf-8'))  # Output: Hello, Redis!

value = r.get('customer_id:1')
print(value)

Hello, Redis!
b'orders_of_customer_1'


In [7]:
r.flushall()

True

## Step 2: Ingest data from data lake to redis (mimicing reverse ETL)
We could also directly use transactional data through postgres data source

In [8]:
sql = "SELECT h_custkey, h_datefirstorder, h_datelastorder, h_totalprice FROM tpch.customer_history ORDER BY h_custkey LIMIT 100"
records = query_duckdb(sql).to_dict('records')

can_spend_money_on_redis = False

for record in records:
    
    record['h_datefirstorder'] = record['h_datefirstorder'].strftime('%Y-%m-%d')
    record['h_datelastorder'] = record['h_datelastorder'].strftime('%Y-%m-%d')
    
    key = f"customer:h_datefirstorder:{ record['h_custkey'] }"
    value_txt = str(record['h_datefirstorder'])
    r.set(key, value_txt)
    
    key = f"customer:h_datelastorder:{ record['h_custkey'] }"
    value_txt = str(record['h_datelastorder'])
    r.set(key, value_txt)
    
    key = f"customer:h_totalprice:{ record['h_custkey'] }"
    value_txt = str(record['h_totalprice'])
    r.set(key, value_txt)
    
    if can_spend_money_on_redis:
        key = f"customer:h_listorders:{ record['h_custkey'] }"
        list_orders = []
        for order in record['h_listorders']:
            list_orders.append({
                'o_orderkey': int(order['o_orderkey']), 
                'o_orderdate': str(order['o_orderdate']), 
                'o_totalprice': float(order['o_totalprice']), 
            })
        value_txt = json.dumps(list_orders)
        r.set(key, value_txt)

### Check that values are in redis

In [9]:
num_customers = len(list(r.scan_iter(match="customer:h_datefirstorder:*")))
num_customers

100

In [10]:
r.get('customer:h_datefirstorder:2').decode('utf-8')

'1992-12-07'

## Step 3: Create a feature source

In [11]:
%%featureql

CREATE OR REPLACE FEATURES AS 
SELECT 
    FM.DEMOS_RT.REDIS_CNX1 := SOURCE_REDIS('redis://host.docker.internal:6380' WITH (timeout='500ms')), 
;

Unnamed: 0,FEATURE_NAME,STATUS,MESSAGE
0,FM.DEMOS_RT.REDIS_CNX1,REPLACED,Feature was replaced


In [12]:
%%featureql --hook output

SHOW FEATURES (INCLUDE ('PROPERTIES')) WHERE NAME='FM.DEMOS_RT.REDIS_CNX1'

INFO: The output has been stored as a dict in variable 'output'


Unnamed: 0,NAME,DATATYPE,FUNCTION,INPUTS,FORMULA,RESTRICTIONS,STATUS,META,SIGNATURE,POSITION,DEPENDENCIES,CREATED_AT,CREATED_BY,UPDATED_AT,UPDATED_BY,PROPERTIES
0,FM.DEMOS_RT.REDIS_CNX1,REDISSOURCE,SOURCE_REDIS,[],"SOURCE_REDIS( json_object( 'function', 'SOURCE_REDIS' , 'uri', 'redis://host.docker.internal:6380' , 'timeout', '500ms' ))","[""ONLINE""]",DEV,{},SOURCE_REDIS.custom.REDISSOURCE,50,"{""FM.DEMOS_RT.REDIS_CNX1"": 1}",2025-11-17 18:11:44.018000,-- None --,2025-11-17 18:11:44.023000,7102b2c7-cd29-4bdd-912e-b3cb6bd93668,"{""config_as_json"": ""{\""function\"":\""SOURCE_REDIS\"",\""uri\"":\""redis://host.docker.internal:6380\"",\""timeout\"":\""500ms\""}""}"


## Step 4: Prototype feature based on new source

By using composability, we can re-use business logic from the offline feature (FM.DEMOS.SEND_PROMOCODE) applied to the online case.

We hardcode mock values for the FM.DEMOS_RT.*_TXT features so that we can prototype the "pure" part of the transformnation.

In [13]:
%%featureql

SELECT 
    FM.CORE.CUSTOMER_ID := BIND_VALUES(ARRAY[1,2,3,4]),

    -- Here we test on mocked up values
    FM.DEMOS_RT.CUSTOMER_DATEFIRST_TXT := '1992-12-07',
    FM.DEMOS_RT.CUSTOMER_DATELAST_TXT := '1998-05-22',
    FM.DEMOS_RT.CUSTOMER_TOTALPRICE_TXT := '1744996.1',

    FM.DEMOS_RT.CUSTOMER_DATEFIRST := CAST(FM.DEMOS_RT.CUSTOMER_DATEFIRST_TXT AS DATE),
    FM.DEMOS_RT.CUSTOMER_DATELAST := CAST(FM.DEMOS_RT.CUSTOMER_DATELAST_TXT AS DATE),
    FM.DEMOS_RT.CUSTOMER_TOTALPRICE := CAST(FM.DEMOS_RT.CUSTOMER_TOTALPRICE_TXT AS DOUBLE),
    
    FM.DEMOS_RT.SHOW_PROMOCODE := VARIANT(
        FM.DEMOS.SEND_PROMOCODE 
        REPLACING FEATURES 
            FM.DEMOS.CUSTOMER_DATEFIRST,
            FM.DEMOS.CUSTOMER_DATELAST,
            FM.DEMOS.CUSTOMER_TOTALPRICE
        WITH 
            FM.DEMOS_RT.CUSTOMER_DATEFIRST,
            FM.DEMOS_RT.CUSTOMER_DATELAST,
            FM.DEMOS_RT.CUSTOMER_TOTALPRICE
    )
;

Unnamed: 0,FM__CORE__CUSTOMER_ID,FM__DEMOS_RT__CUSTOMER_DATEFIRST_TXT,FM__DEMOS_RT__CUSTOMER_DATELAST_TXT,FM__DEMOS_RT__CUSTOMER_TOTALPRICE_TXT,FM__DEMOS_RT__CUSTOMER_DATEFIRST,FM__DEMOS_RT__CUSTOMER_DATELAST,FM__DEMOS_RT__CUSTOMER_TOTALPRICE,FM__DEMOS_RT__SHOW_PROMOCODE
0,1,1992-12-07,1998-05-22,1744996.1,1992-12-07,1998-05-22,1744996.1,True
1,2,1992-12-07,1998-05-22,1744996.1,1992-12-07,1998-05-22,1744996.1,True
2,3,1992-12-07,1998-05-22,1744996.1,1992-12-07,1998-05-22,1744996.1,True
3,4,1992-12-07,1998-05-22,1744996.1,1992-12-07,1998-05-22,1744996.1,True


## Step 5: Persist these features

Using the real redis sources instead of text literals

**TODO:** Bug when creating all the features in one go. For now, create one by one.

In [14]:
%%featureql

CREATE OR REPLACE FEATURES IN FM.DEMOS_RT AS
SELECT
    CUSTOMER_DATEFIRST_TXT := EXTERNAL_REDIS(KEY 'customer:h_datefirstorder:' || CAST(FM.CORE.CUSTOMER_ID AS VARCHAR) FROM REDIS_CNX1),
    CUSTOMER_DATELAST_TXT := EXTERNAL_REDIS(KEY 'customer:h_datelastorder:' || CAST(FM.CORE.CUSTOMER_ID AS VARCHAR) FROM REDIS_CNX1),
    CUSTOMER_TOTALPRICE_TXT := EXTERNAL_REDIS(KEY 'customer:h_totalprice:' || CAST(FM.CORE.CUSTOMER_ID AS VARCHAR) FROM REDIS_CNX1),

    CUSTOMER_DATEFIRST := CAST(CUSTOMER_DATEFIRST_TXT AS DATE),
    CUSTOMER_DATELAST := CAST(CUSTOMER_DATELAST_TXT AS DATE),
    CUSTOMER_TOTALPRICE := CAST(CUSTOMER_TOTALPRICE_TXT AS DOUBLE)

Unnamed: 0,FEATURE_NAME,STATUS,MESSAGE
0,FM.DEMOS_RT.CUSTOMER_DATEFIRST_TXT,REPLACED,Feature was replaced
1,FM.DEMOS_RT.CUSTOMER_DATELAST_TXT,REPLACED,Feature was replaced
2,FM.DEMOS_RT.CUSTOMER_TOTALPRICE_TXT,REPLACED,Feature was replaced
3,FM.DEMOS_RT.CUSTOMER_DATEFIRST,REPLACED,Feature was replaced
4,FM.DEMOS_RT.CUSTOMER_DATELAST,REPLACED,Feature was replaced
5,FM.DEMOS_RT.CUSTOMER_TOTALPRICE,REPLACED,Feature was replaced


In [15]:
# Check that features were correctly created
output = client_online.query("SHOW FEATURES IN FM.DEMOS_RT")
assert(len(output.dataframe)>=7)

### Create the transformation features reusing the definitions

**TODO:** Bug when creating all the features in one go. For now, create one by one.

In [16]:
%%featureql 

CREATE OR REPLACE FEATURES IN FM.DEMOS_RT AS
SELECT            
    IS_OLD_CUSTOMER             := DATE_DIFF(CAST(CUSTOMER_DATEFIRST AS TIMESTAMP), TIMESTAMP '1998-08-02', 'DAY') > 180,
    LIFETIME_REVENUE_GT_1000000 := CUSTOMER_TOTALPRICE >.5 1000000e0,
    HAS_RECENT_ORDER            := DATE_DIFF(CAST(CUSTOMER_DATELAST AS TIMESTAMP), TIMESTAMP '1998-08-02', 'DAY') < 60,
    SHOW_PROMOCODE              := IS_OLD_CUSTOMER AND LIFETIME_REVENUE_GT_1000000 AND NOT HAS_RECENT_ORDER

Unnamed: 0,FEATURE_NAME,STATUS,MESSAGE
0,FM.DEMOS_RT.IS_OLD_CUSTOMER,REPLACED,Feature was replaced
1,FM.DEMOS_RT.LIFETIME_REVENUE_GT_1000000,REPLACED,Feature was replaced
2,FM.DEMOS_RT.HAS_RECENT_ORDER,REPLACED,Feature was replaced
3,FM.DEMOS_RT.SHOW_PROMOCODE,REPLACED,Feature was replaced


### Or better, use a VARIANT to reuse the definitions from analytics

In [17]:
%%featureql

CREATE OR REPLACE FEATURES AS
SELECT
    FM.DEMOS_RT.SHOW_PROMOCODE_V := VARIANT(
        FM.DEMOS.SEND_PROMOCODE 
        REPLACING FEATURES 
            FM.DEMOS.CUSTOMER_DATEFIRST,
            FM.DEMOS.CUSTOMER_DATELAST,
            FM.DEMOS.CUSTOMER_TOTALPRICE
        WITH 
            FM.DEMOS_RT.CUSTOMER_DATEFIRST,
            FM.DEMOS_RT.CUSTOMER_DATELAST,
            FM.DEMOS_RT.CUSTOMER_TOTALPRICE
    )

Unnamed: 0,FEATURE_NAME,STATUS,MESSAGE
0,FM.DEMOS_RT.SHOW_PROMOCODE_V,REPLACED,Feature was replaced


### Check that we have all features registered

In [18]:
# Check that features were correctly created
output = client_online.query("SHOW FEATURES IN FM.DEMOS_RT")
assert(len(output.dataframe)>=12)

### Check we can evaluate using magic commands

In [19]:
%%featureql

SELECT 
    FM.CORE.CUSTOMER_ID := BIND_VALUES(ARRAY[1,2,3,4]),
    FM.DEMOS_RT.CUSTOMER_DATEFIRST_TXT,
    FM.DEMOS_RT.SHOW_PROMOCODE_V

Unnamed: 0,FM__CORE__CUSTOMER_ID,FM__DEMOS_RT__CUSTOMER_DATEFIRST_TXT,FM__DEMOS_RT__SHOW_PROMOCODE_V
0,1,1992-04-19,True
1,2,1992-12-07,True
2,3,,
3,4,1992-06-09,False


## Step 5: Call FeatureMesh APIs from "production code"

We do not need to develop a dedicated endpoint to serve these data, reducing system complexity.

It's what we call Feature as a Service (FaaS)

### Step 5a: Direct HTTP Call to FeatureMesh backend service

In [20]:
import requests

def call_api_show_promocode(customer_id: int) -> str:
    params_post = {
        "query": f"""
            SELECT 
                FM.CORE.CUSTOMER_ID := BIND_VALUE({customer_id}),
                FM.DEMOS_RT.CUSTOMER_DATELAST_TXT,
                FM.DEMOS_RT.SHOW_PROMOCODE_V
        """, 
    }
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {__YOUR_ACCESS_TOKEN__}",
    }
    response = requests.post(f"{get_featuremesh_config()['serving.host']}/v1/featureql", json=params_post, headers=headers, timeout=30, verify=False)

    if response.status_code == 200:
        return response.json()
    else:
        print(f'A non HTTP 200 response occured: {response=}: {response.text}')

In [21]:
# See send promocode for customer 1,2,3,4
for customer_id in range(1,5):
    output = call_api_show_promocode(customer_id)
    # print(output)
    print(output['data']['rows'])

[{'fm__core__customer_id': 1, 'fm__demos_rt__customer_datelast_txt': '1997-03-04', 'fm__demos_rt__show_promocode_v': True}]


[{'fm__core__customer_id': 2, 'fm__demos_rt__customer_datelast_txt': '1998-05-22', 'fm__demos_rt__show_promocode_v': True}]


[{'fm__core__customer_id': 3, 'fm__demos_rt__customer_datelast_txt': None, 'fm__demos_rt__show_promocode_v': None}]


[{'fm__core__customer_id': 4, 'fm__demos_rt__customer_datelast_txt': '1998-06-15', 'fm__demos_rt__show_promocode_v': False}]


In [22]:
# Simulate a new order for customer 2
r.set('customer:h_datelastorder:2', '1998-08-01')

True

In [23]:
# Because of the recency of customer 2 has changed, the value for send_promocode passed to False
for customer_id in range(1,5):
    output = call_api_show_promocode(customer_id)
    #print(output)
    print(output['data']['rows'])

[{'fm__core__customer_id': 1, 'fm__demos_rt__customer_datelast_txt': '1997-03-04', 'fm__demos_rt__show_promocode_v': True}]


[{'fm__core__customer_id': 2, 'fm__demos_rt__customer_datelast_txt': '1998-08-01', 'fm__demos_rt__show_promocode_v': False}]


[{'fm__core__customer_id': 3, 'fm__demos_rt__customer_datelast_txt': None, 'fm__demos_rt__show_promocode_v': None}]


[{'fm__core__customer_id': 4, 'fm__demos_rt__customer_datelast_txt': '1998-06-15', 'fm__demos_rt__show_promocode_v': False}]


### Step 5b: Call FeatureMesh APIs using the python client

If you are calling the online service from python, you can also use the python client directly

In [24]:
for customer_id in range(1,5):
    query = f"""
        SELECT 
            FM.CORE.CUSTOMER_ID := BIND_VALUE({customer_id}),
            FM.DEMOS_RT.CUSTOMER_DATELAST_TXT,
            FM.DEMOS_RT.SHOW_PROMOCODE_V
    """
    dct = client_online.query(query)
    print(dct.dataframe.to_dict(orient='records'))

[{'FM__CORE__CUSTOMER_ID': 1, 'FM__DEMOS_RT__CUSTOMER_DATELAST_TXT': '1997-03-04', 'FM__DEMOS_RT__SHOW_PROMOCODE_V': True}]


[{'FM__CORE__CUSTOMER_ID': 2, 'FM__DEMOS_RT__CUSTOMER_DATELAST_TXT': '1998-08-01', 'FM__DEMOS_RT__SHOW_PROMOCODE_V': False}]


[{'FM__CORE__CUSTOMER_ID': 3, 'FM__DEMOS_RT__CUSTOMER_DATELAST_TXT': None, 'FM__DEMOS_RT__SHOW_PROMOCODE_V': None}]


[{'FM__CORE__CUSTOMER_ID': 4, 'FM__DEMOS_RT__CUSTOMER_DATELAST_TXT': '1998-06-15', 'FM__DEMOS_RT__SHOW_PROMOCODE_V': False}]


## Step 6: Using prepared statements
The problem of free-form queries is that it is slow and innefficient. In prod, you need to use a prepared statement.

- Step 1: Register a stored procedure feature based on "FM.DEMOS_RT.SHOW_PROMOCODE" with input CUSTOMER_ID
- Step 2: Call it using the serving endpoint

**TODO:** Repair PREPARED_STATEMENTS based on variants

In [25]:
%%featureql

CREATE OR REPLACE FEATURE FM.DEMOS_RT.SHOW_PROMOCODE_V_PS AS 
PREPARED_STATEMENT(
    FM.DEMOS_RT.SHOW_PROMOCODE_V
    USING INPUTS FM.CORE.CUSTOMER_ID
);

Unnamed: 0,FEATURE_NAME,STATUS,MESSAGE
0,FM.DEMOS_RT.SHOW_PROMOCODE_V_PS,REPLACED,Feature was replaced


In [26]:
def call_prepared_statement_1arg(feature_name: str, customer_id: int) -> str:
    params_post = {
      "statement": {
        "id": feature_name,
        "inputs": {
            "input_table_1": [[customer_id]],
        }
      }
    }
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {__YOUR_ACCESS_TOKEN__}",
    }
    response = requests.post(f"{get_featuremesh_config()['serving.host']}/v1/featureql", json=params_post, headers=headers, timeout=30, verify=False)

    if response.status_code == 200:
        return response.json()
    else:
        print(f'A non HTTP 200 response occured: {response=}: {response.text}')

In [27]:
call_prepared_statement_1arg("FM.DEMOS_RT.SHOW_PROMOCODE_V_PS", 2)

{'data': {'schema': {'fields': [{'name': 'fm__core__customer_id',
     'data_type': 'Int64',
     'nullable': False,
     'dict_id': 0,
     'dict_is_ordered': False,
     'metadata': {}},
    {'name': 'fm__demos_rt__show_promocode_v_ps',
     'data_type': 'Boolean',
     'nullable': True,
     'dict_id': 0,
     'dict_is_ordered': False,
     'metadata': {}}],
   'metadata': {}},
  'rows': [{'fm__core__customer_id': 2,
    'fm__demos_rt__show_promocode_v_ps': False}]}}

### Calling for 100 customers

About 30ms per call.

In [28]:
%%time

responses = []
for customer_id in range(1,100):
    responses.append(call_prepared_statement_1arg("FM.DEMOS_RT.SHOW_PROMOCODE_V_PS", customer_id))

print(responses[1].get('data', {}).get('rows', []))

[{'fm__core__customer_id': 2, 'fm__demos_rt__show_promocode_v_ps': False}]
CPU times: user 245 ms, sys: 72.1 ms, total: 317 ms
Wall time: 3.58 s
