# 07 Segmenting customers with advanced metrics

In [1]:
import json
import sqlalchemy
import pandas as pd

from pprint import pprint

In [2]:
# Make a sql connection with sqlalchmey
conn_string = "postgresql://postgres-db/churn?user=postgres&password=password" 
engine = sqlalchemy.create_engine(
    conn_string, connect_args={'options': '-csearch_path={}'.format("socialnet7,public")}
)
conn = engine.connect()
# conn.close()

# Query with Pandas, e.g. list all tables
tables = pd.read_sql_query("SELECT * FROM information_schema.tables;", conn)
tables.head(3)

Unnamed: 0,table_catalog,table_schema,table_name,table_type,self_referencing_column_name,reference_generation,user_defined_type_catalog,user_defined_type_schema,user_defined_type_name,is_insertable_into,is_typed,commit_action
0,churn,socialnet7,active_period,BASE TABLE,,,,,,YES,NO,
1,churn,socialnet7,event_type,BASE TABLE,,,,,,YES,NO,
2,churn,socialnet7,metric_name,BASE TABLE,,,,,,YES,NO,


In [3]:
import json 
from box import Box
# !pip install python-box

with open("/app/fightchurn/listings/conf/socialnet7_listings.json", "r") as f: 
    # conf = json.loads(f.read())
    conf = Box(json.loads(f.read()))

chap8 = conf.chap8

In [9]:
import sys
sys.path.append("/app")

from fightchurn.listings.chap5.listing_5_2_dataset_stats import dataset_stats
from fightchurn.listings.chap7.listing_7_5_fat_tail_scores import fat_tail_scores
from fightchurn.listings.chap6.listing_6_4_find_metric_groups import find_metric_groups
from fightchurn.listings.chap6.listing_6_3_apply_metric_groups import apply_metric_groups
from fightchurn.listings.chap6.listing_6_5_ordered_correlation_matrix import ordered_correlation_matrix

In [4]:
pprint(chap8)

{'defaults': {'data_set_path': 'socialnet7/socialnet7_dataset2.csv',
              'type': 'py'},
 'list0': {'name': 'dataset2',
           'params': {'%from_yyyy-mm-dd': '2020-03-01',
                      '%metric_interval': '7 day',
                      '%to_yyyy-mm-dd': '2020-05-10',
                      'mode': 'save',
                      'type': 'sql'}},
 'list1': {'name': 'prepare_data',
           'params': Box({'group_corr_thresh': 0.65}),
           'v2': Box({'data_set_path': 'socialnet7/socialnet7_dataset.csv'}),
           'v3': {'data_set_path': 'socialnet7/socialnet7_dataset3_nocat.csv'}},
 'list2': {'name': 'logistic_regression',
           'params': Box({}),
           'v1': Box({'as_retention': False})},
 'list3': {'name': 'dataset2_current',
           'params': {'%metric_interval': '7 day',
                      'mode': 'save',
                      'type': 'sql'}},
 'list4': Box({'name': 'rescore_metrics', 'params': {}}),
 'list5': {'name': 'churn_forecast',
  

## Dependent Data

In [4]:
tmp = pd.read_sql_query("SELECT * FROM metric_name ORDER BY metric_name_id", conn)
tmp.tail()

Unnamed: 0,metric_name_id,metric_name
4,4,dislike_per_month
5,5,unfriend_per_month
6,6,message_per_month
7,7,reply_per_month
8,8,account_tenure


In [5]:
tmp = pd.read_sql_query("SELECT * FROM metric ORDER BY RANDOM() LIMIT 10000", conn)
tmp.head()

Unnamed: 0,account_id,metric_time,metric_name_id,metric_value
0,8371,2020-02-09,4,10.0
1,10505,2020-05-10,8,98.0
2,10050,2020-03-08,3,5.0
3,6899,2020-03-22,8,77.0
4,1995,2020-03-01,4,3.0


In [116]:
tmp = pd.read_sql_query("SELECT * FROM event ORDER BY RANDOM() LIMIT 10000", conn)
tmp.head()

Unnamed: 0,account_id,event_time,event_type_id
0,1923,2020-02-14 12:09:11,2
1,883,2020-01-23 02:04:19,4
2,315,2020-04-09 06:33:00,2
3,6513,2020-05-02 06:10:41,3
4,6966,2020-05-27 22:31:57,0


## Metric Queries

In [10]:
from_yyyy_mm_dd = chap7.defaults["%from_yyyy-mm-dd"] 
to_yyyy_mm_dd = chap7.defaults["%to_yyyy-mm-dd"]
from_yyyy_mm_dd, to_yyyy_mm_dd 

('2020-03-01', '2020-05-10')

### Listing 7.1 Ratio Metric

In [6]:
tmp = chap7.list1.insert
# pprint(tmp)

new_metric_ids = [tmp.params["%new_metric_id"], *[tmp[f"v{i}"]["%new_metric_id"] for i in range(2,8)]] 
new_metric_names = [tmp.params["%new_metric_name"], *[tmp[f"v{i}"]["%new_metric_name"] for i in range(2,8)]] 

# pair of (numerator, denominator)
ratio_metric_pairs = [
    (tmp.params["%num_metric"], tmp.params["%den_metric"]),
    *[(tmp[f"v{i}"]["%num_metric"], tmp[f"v{i}"].get("%den_metric")) for i in range(2,8)]
] 
ratio_metric_pairs

[('adview_per_month', 'post_per_month'),
 ('reply_per_month', 'message_per_month'),
 ('like_per_month', None),
 ('post_per_month', 'message_per_month'),
 ('unfriend_per_month', 'newfriend_per_month'),
 ('dislike_per_month', 'total_opinions'),
 ('unfriend_28day_avg_84day_obs_scaled', 'newfriend_per_month')]

In [15]:
num_metric = chap7.list1.insert.params["%num_metric"]
den_metric = chap7.list1.insert.params["%den_metric"]
new_metric_id = chap7.list1.insert.params["%new_metric_id"]
new_metric_name = chap7.list1.insert.params["%new_metric_name"]

num_metric, den_metric, new_metric_id, new_metric_name

('adview_per_month', 'post_per_month', 21, 'adviw_per_post')

In [26]:
# insert_7_1_ratio_metric

query = f"""\
INSERT into metric_name values ({new_metric_id}, concat('{new_metric_name}'))
ON CONFLICT DO NOTHING;

-- numerator metric
WITH num_metric AS (
            SELECT
                    account_id,
                    metric_time,
                    metric_value AS num_value
                FROM
                    metric AS m
    INNER JOIN metric_name AS n
                     ON n.metric_name_id = m.metric_name_id
                  AND n.metric_name = '{num_metric}'
                  AND metric_time BETWEEN '{from_yyyy_mm_dd}' AND '{to_yyyy_mm_dd}'
), -- denominator metric
den_metric AS (
            SELECT
                    account_id,
                    metric_time,
                    metric_value AS den_value
              FROM
                    metric AS m
    INNER JOIN metric_name AS n
                    ON n.metric_name_id=m.metric_name_id
               AND n.metric_name = '{den_metric}'
                 AND metric_time BETWEEN '{from_yyyy_mm_dd}' AND '{to_yyyy_mm_dd}'
)

-- INSERT INTO metric (account_id, metric_time, metric_name_id, metric_value)

         SELECT
             d.account_id,
             d.metric_time,
             {new_metric_id} AS metric_name_id,
      CASE WHEN den_value > 0
             THEN COALESCE(num_value,0.0)/den_value
             ELSE 0
              END AS metric_value,
             n.num_value, 
             d.den_value
           FROM den_metric AS d
LEFT OUTER JOIN num_metric AS n
             ON n.account_id = d.account_id
            AND n.metric_time = d.metric_time

-- ON CONFLICT DO NOTHING;
"""
res = pd.read_sql_query(query, conn)
res.head()

Unnamed: 0,account_id,metric_time,metric_name_id,metric_value,num_value,den_value
0,5830,2020-05-03,21,3.0,3.0,1.0
1,5573,2020-03-08,21,2.25,9.0,4.0
2,11421,2020-04-12,21,0.863636,19.0,22.0
3,3278,2020-04-12,21,0.902857,158.0,175.0
4,2117,2020-03-29,21,0.0,,1.0


In [28]:
res[res.num_value.isna()].head()

Unnamed: 0,account_id,metric_time,metric_name_id,metric_value,num_value,den_value
4,2117,2020-03-29,21,0.0,,1.0
46,7810,2020-04-05,21,0.0,,1.0
107,2928,2020-03-01,21,0.0,,7.0
194,12652,2020-04-12,21,0.0,,1.0
305,11738,2020-03-08,21,0.0,,1.0


In [29]:
res[res.den_value.isna()].head()

Unnamed: 0,account_id,metric_time,metric_name_id,metric_value,num_value,den_value


**Ratio Metrics** are just taking one metric then divided by another metric

- if the numerator is NaN, then assgin 0 to it
- this example shows `adview_per_month` divided by `post_per_month`, which makes some business sense
  - if the value is greater, that means the observation has viewed more ads, relative to post read

### Litsting 7.3 Total Metrics

In [48]:
metric_list = chap7.list3.params["%metric_list"]

metric_list = chap7.list3.insert.params["%metric_list"]
new_metric_id = chap7.list3.insert.params["%new_metric_id"]
new_metric_name = chap7.list3.insert.params["%new_metric_name"]
metric_list, new_metric_id, new_metric_name

(" 'like_per_month', 'dislike_per_month' ", 26, 'total_opinions')

In [52]:
# insert_7_3_total_metric
# sum all values of metrics in the given list, forming a new metric
# e.g. likes_per_month and dislike_per_month

query = f"""\

INSERT into metric_name values ({new_metric_id},'{new_metric_name}')
ON CONFLICT DO NOTHING;

-- INSERT INTO metric (account_id,metric_time,metric_name_id,metric_value)

    SELECT 
        account_id, 
        metric_time, 
        {new_metric_id} AS metric_name_id, 
        SUM(metric_value) AS metric_total,
        ARRAY_AGG(metric_value) AS metric_values
      FROM 
        metric AS m 
INNER JOIN 
        metric_name AS n 
        ON n.metric_name_id = m.metric_name_id
       AND n.metric_name IN ({metric_list})
     WHERE metric_time BETWEEN '{from_yyyy_mm_dd}' AND '{to_yyyy_mm_dd}'
  GROUP BY metric_time, account_id

-- ON CONFLICT DO NOTHING;
"""

res = pd.read_sql_query(query, conn)
res.head()

Unnamed: 0,account_id,metric_time,metric_name_id,metric_total,metric_values
0,1,2020-03-01,26,4.0,"[1.0, 3.0]"
1,2,2020-03-01,26,169.0,"[108.0, 61.0]"
2,3,2020-03-01,26,2.0,[2.0]
3,4,2020-03-01,26,42.0,"[37.0, 5.0]"
4,6,2020-03-01,26,38.0,"[12.0, 26.0]"


**Total Metrics** are suming values of multiple metrics in a given time periodo

- this examples sums the values of like and dislike on each observation time for each account  
- the metric is giving the information on "observation's interactions on the products"

### Litsting 7.4 Percent Chnage Metrics

In [89]:
from_yyyy_mm_dd = chap7.defaults["%from_yyyy-mm-dd"] 
to_yyyy_mm_dd = chap7.defaults["%to_yyyy-mm-dd"]
from_yyyy_mm_dd, to_yyyy_mm_dd 

('2020-03-01', '2020-05-10')

In [90]:
new_metric_id = chap7.list4.insert.params["%new_metric_id"]
new_metric_name = chap7.list4.insert.params["%new_metric_name"]
metric2measure = chap7.list4.insert.params["%metric2measure"]
period_weeks = chap7.list4.insert.params["%period_weeks"]

new_metric_id, new_metric_name, metric2measure, period_weeks

(30, 'new_friends_pcnt_change', 'newfriend_per_month', 4)

In [79]:
# insert_7_4_percent_change_metric
# caculcate the percentage change of a given metric to form a new metric
# e.g. newfriend_per_month to new_friends_pct_change (comparing with previous period)

query = f"""\

INSERT into metric_name values ({new_metric_id},'{new_metric_name}')
ON CONFLICT DO NOTHING;

WITH end_metric AS (
    SELECT 
        account_id, 
        metric_time, 
        metric_value AS end_value
      FROM metric AS m 
INNER JOIN metric_name AS n 
        ON n.metric_name_id = m.metric_name_id
       AND n.metric_name = '{metric2measure}'
       AND metric_time BETWEEN '{from_yyyy_mm_dd}' AND '{to_yyyy_mm_dd}'
), 
start_metric AS (
    SELECT 
        account_id, 
        metric_time, 
        metric_value AS start_value
      FROM metric AS m 
INNER JOIN metric_name AS n 
        ON n.metric_name_id = m.metric_name_id
       AND n.metric_name = '{metric2measure}'
       AND metric_time 
           BETWEEN ('{from_yyyy_mm_dd}'::timestamp - interval '{period_weeks} week')
               AND ('{to_yyyy_mm_dd}'::timestamp - interval '{period_weeks} week')
)

-- INSERT INTO metric (account_id,metric_time,metric_name_id,metric_value)

          SELECT 
              s.account_id AS account_id,  
              s.metric_time + interval '{period_weeks} week' AS metric_time,  -- this is also the end time
              {new_metric_id} AS metric_name_id,
              COALESCE(end_value,0.0) / start_value - 1.0 AS metric_value,
              s.start_value,
              e.end_value
            FROM 
              start_metric AS s 
 LEFT OUTER JOIN end_metric AS e
              ON s.account_id = e.account_id
             AND e.metric_time = (s.metric_time + interval '{period_weeks} week')
             -- ending metric time equals to starting metric time plust the one month interval
           WHERE start_value > 0
 
-- ON CONFLICT DO NOTHING;
"""
res = pd.read_sql_query(query, conn)
res.head()

Unnamed: 0,account_id,metric_time,metric_name_id,metric_value,start_value,end_value
0,5651,2020-03-08,30,1.222222,9.0,20.0
1,8653,2020-03-08,30,2.0,2.0,6.0
2,9346,2020-03-15,30,-0.333333,9.0,6.0
3,1834,2020-03-08,30,0.5,2.0,3.0
4,5373,2020-03-01,30,4.5,2.0,11.0


**Percent Change Metrics** are getting the percentage change for a given metric between start and end

- this example is about new friend per month, could be either positve or negative
- it should give information on observation's engagement in terms of friends on the product

In [71]:
# observations that have different start and end value 
tmp = res.sort_values(["account_id", "metric_time"]).query("start_value != end_value")
display(tmp.shape)
tmp.query("account_id == 2")

(90119, 6)

Unnamed: 0,account_id,metric_time,metric_name_id,metric_value,start_value,end_value
16925,2,2020-03-01,30,0.5,4.0,6.0
25167,2,2020-03-08,30,0.166667,6.0,7.0
16013,2,2020-03-15,30,-0.333333,9.0,6.0
20120,2,2020-03-22,30,-0.142857,7.0,6.0
69652,2,2020-03-29,30,-0.166667,6.0,5.0
60683,2,2020-04-05,30,-0.714286,7.0,2.0
78754,2,2020-04-12,30,-1.0,6.0,
34151,2,2020-04-19,30,-1.0,6.0,
90377,2,2020-04-26,30,-1.0,5.0,
39703,2,2020-05-03,30,-1.0,2.0,


In [77]:
query = f"""\
    SELECT 
        account_id, 
        metric_time, 
        metric_value AS start_value,
        '{from_yyyy_mm_dd}'::timestamp - interval '{period_weeks} week' AS between_start, 
        '{to_yyyy_mm_dd}'::timestamp - interval '{period_weeks} week' AS between_end 
      FROM metric AS m 
INNER JOIN metric_name AS n 
        ON n.metric_name_id = m.metric_name_id
       AND n.metric_name = '{metric2measure}'
       AND metric_time 
           BETWEEN ('{from_yyyy_mm_dd}'::timestamp - interval '{period_weeks} week')
               AND ('{to_yyyy_mm_dd}'::timestamp - interval '{period_weeks} week')
"""

tmp = pd.read_sql_query(query, conn).sort_values("metric_time")
tmp.query("account_id == 2")
# start_value is the value of "new friends per month" at the given interval

Unnamed: 0,account_id,metric_time,start_value,between_start,between_end
16925,2,2020-02-02,4.0,2020-02-02,2020-04-12
25167,2,2020-02-09,6.0,2020-02-02,2020-04-12
16013,2,2020-02-16,9.0,2020-02-02,2020-04-12
20120,2,2020-02-23,7.0,2020-02-02,2020-04-12
69652,2,2020-03-01,6.0,2020-02-02,2020-04-12
60683,2,2020-03-08,7.0,2020-02-02,2020-04-12
78754,2,2020-03-15,6.0,2020-02-02,2020-04-12
34151,2,2020-03-22,6.0,2020-02-02,2020-04-12
90377,2,2020-03-29,5.0,2020-02-02,2020-04-12
39703,2,2020-04-05,2.0,2020-02-02,2020-04-12


In [78]:
query = f"""\
    SELECT 
        account_id, 
        metric_time, 
        metric_value AS end_value,
        '{from_yyyy_mm_dd}' AS between_start, 
        '{to_yyyy_mm_dd}' AS between_end
      FROM metric AS m 
INNER JOIN metric_name AS n 
        ON n.metric_name_id = m.metric_name_id
       AND n.metric_name = '{metric2measure}'
       AND metric_time BETWEEN '{from_yyyy_mm_dd}' AND '{to_yyyy_mm_dd}'
"""
tmp = pd.read_sql_query(query, conn).sort_values("metric_time")
tmp.query("account_id == 2")

Unnamed: 0,account_id,metric_time,end_value,between_start,between_end
62481,2,2020-03-01,6.0,2020-03-01,2020-05-10
49799,2,2020-03-08,7.0,2020-03-01,2020-05-10
75328,2,2020-03-15,6.0,2020-03-01,2020-05-10
11911,2,2020-03-22,6.0,2020-03-01,2020-05-10
91821,2,2020-03-29,5.0,2020-03-01,2020-05-10
19868,2,2020-04-05,2.0,2020-03-01,2020-05-10


### Litsting 7.6 Days Since Events Metrics

In [95]:
from_yyyy_mm_dd = chap7.list6.params["%from_yyyy-mm-dd"] 
to_yyyy_mm_dd = chap7.list6.params["%to_yyyy-mm-dd"]
from_yyyy_mm_dd, to_yyyy_mm_dd 

('2020-05-03', '2020-05-10')

In [94]:
event2measure = chap7.list6.insert.params["%event2measure"]
new_metric_id = chap7.list6.insert.params["%new_metric_id"]
new_metric_name = chap7.list6.insert.params["%new_metric_name"]

event2measure, new_metric_id, new_metric_name

('newfriend', 31, 'days_since_newfriend')

In [102]:
# insert_7_6_days_since_event
# calculate difference beween a given event type to form a new metric
# e.g. day since new friend event

query = f"""\
INSERT into metric_name values ({new_metric_id},concat('days_since_{event2measure}' ))
ON CONFLICT DO NOTHING;

WITH date_vals AS (
  SELECT 
      i::date AS metric_date
    FROM 
      GENERATE_SERIES('{from_yyyy_mm_dd}', '{to_yyyy_mm_dd}', '7 day'::interval) AS i
),
last_event AS (
      SELECT 
          account_id, 
          metric_date, 
          MAX(event_time)::date AS last_date
        FROM 
          event AS e 
  INNER JOIN date_vals AS d
          ON e.event_time::date <= metric_date
  INNER JOIN event_type AS t 
          ON t.event_type_id=e.event_type_id
       WHERE t.event_type_name='{event2measure}'
    GROUP BY account_id, metric_date
    ORDER BY account_id, metric_date
)

-- INSERT INTO metric (account_id,metric_time,metric_name_id,metric_value)

    SELECT 
        account_id, 
        metric_date AS metric_time, 
        {new_metric_id} AS metric_name_id,
        metric_date - last_date AS days_since_event, -- metric_value
        metric_date, 
        last_date
      FROM 
        last_event

-- ON CONFLICT DO NOTHING;
"""
res = pd.read_sql_query(query, conn).sort_values(["account_id", "metric_time"])
res.head()

Unnamed: 0,account_id,metric_time,metric_name_id,days_since_event,metric_date,last_date
0,1,2020-05-03,31,17,2020-05-03,2020-04-16
1,1,2020-05-10,31,24,2020-05-10,2020-04-16
2,2,2020-05-03,31,54,2020-05-03,2020-03-10
3,2,2020-05-10,31,61,2020-05-10,2020-03-10
4,4,2020-05-03,31,2,2020-05-03,2020-05-01


**Days since last event**

- this example is about event on "new friend" 
- for a given observation date of an account, find its last event on "new friend", then get the difference in days
- the value gives information on observation's engagement in terms of adding new friends


### Litsting 7.7 Scaled Events per Month Metrics

In [106]:
from_yyyy_mm_dd = chap7.defaults["%from_yyyy-mm-dd"] 
to_yyyy_mm_dd = chap7.defaults["%to_yyyy-mm-dd"]
from_yyyy_mm_dd, to_yyyy_mm_dd 

('2020-03-01', '2020-05-10')

In [115]:
desc_period = chap7.list7.insert.params["%desc_period"]
obs_period = chap7.list7.params["%obs_period"]
event2measure = chap7.list7.insert.params["%event2measure"]
new_metric_id = chap7.list7.insert.params["%new_metric_id"]

desc_period, obs_period, event2measure, new_metric_id

(28, 84, 'unfriend', 33)

In [118]:
# insert_7_7_scaled_events_per_month
# unfriend event example: unfriend 28 day average with 84 days observation 

query = f"""\

INSERT into metric_name values ({new_metric_id},'{event2measure}_{desc_period}day_avg_{obs_period}day_obs')
ON CONFLICT DO NOTHING;

WITH date_vals AS (
    SELECT 
        i::timestamp AS metric_date 
      FROM 
        GENERATE_SERIES('{from_yyyy_mm_dd}', '{to_yyyy_mm_dd}', '7 day'::interval) AS i
)

-- INSERT INTO metric (account_id, metric_time, metric_name_id, metric_value)

    SELECT 
        account_id,
        metric_date,
        {new_metric_id} AS metric_name_id,
        (({desc_period})::float / ({obs_period})::float) * COUNT(*) AS metric_value,
        COUNT(*) AS event_count -- number of events within the given range, then multiple with 1/3
        -- desc_period is a month, and obs_period is 3 months
        -- so one-third of the counts as an scaled (average)
      FROM 
        event AS e 
INNER JOIN 
        date_vals AS d
        ON e.event_time <= metric_date 
       AND e.event_time > metric_date - interval '{obs_period} days'
       -- event time "before the metric observation date" and "before the metric minus the observation periods (84 days)"
       -- i.e. within 3 months before metric date
INNER JOIN 
        event_type AS t 
        ON t.event_type_id = e.event_type_id
     WHERE t.event_type_name='{event2measure}'
  GROUP BY account_id, metric_date
  ORDER BY metric_date, account_id

-- ON CONFLICT DO NOTHING;
"""
res = pd.read_sql_query(query, conn).sort_values(["account_id", "metric_date"])
res.head()

Unnamed: 0,account_id,metric_date,metric_name_id,metric_value,event_count
0,1,2020-03-01,33,0.666667,2
4055,1,2020-03-08,33,0.666667,2
8593,1,2020-03-15,33,0.666667,2
13560,1,2020-03-22,33,1.0,3
18994,1,2020-03-29,33,1.333333,4


**Scaled Events per Month**

- this example looks at unfriend events
- average unfriend over the past 3 observation periods, i.e. 3 months
- gives some information on user's engagement in terms of unfriend activities

### Litsting 7.8 Tenure Scaled Events per Month Metrics

In [121]:
from_yyyy_mm_dd = chap7.list8.params["%from_yyyy-mm-dd"] 
to_yyyy_mm_dd = chap7.list8.params["%to_yyyy-mm-dd"]
from_yyyy_mm_dd, to_yyyy_mm_dd 

('2020-05-10', '2020-05-10')

In [122]:
desc_period = chap7.list8.insert.params["%desc_period"] 
obs_period = chap7.list8.insert.params["%obs_period"] 
min_tenure = chap7.list8.insert.params["%min_tenure"] 
event2measure = chap7.list8.insert.params["%event2measure"] 
new_metric_id = chap7.list8.insert.params["%new_metric_id"] 

desc_period, obs_period, min_tenure, event2measure, new_metric_id

(28, 84, 14, 'unfriend', 34)

In [124]:
# insert_7_8_tenure_scaled_events_per_month
# similar to 7.7, but improved for new accounts (i.e. shorter tenure)

query = f"""\

-- scaled on tenure, slightly different to Listing 7.8
INSERT INTO metric_name VALUES ({new_metric_id},'{event2measure}_{desc_period}day_avg_{obs_period}day_obs_scaled')
ON CONFLICT DO NOTHING;

-- INSERT INTO metric (account_id,metric_time,metric_name_id,metric_value)

    SELECT 
        m.account_id, 
        metric_time, 
        {new_metric_id} AS metric_name_id,
        ({desc_period} / LEAST({obs_period}, m.metric_value)) * COUNT(*) AS metric_value,
        -- choosing the least between parameterised "obs_period" and "account's metric obseration date "
        COUNT(*) AS event_count
      FROM 
        event AS e 
INNER JOIN 
        metric AS m
        ON m.account_id = e.account_id
       AND event_time <= metric_time
       AND event_time >  metric_time-interval '{obs_period} days'
INNER JOIN 
        event_type AS t 
        ON t.event_type_id=e.event_type_id
INNER JOIN 
        metric_name AS n 
        ON m.metric_name_id = n.metric_name_id
     WHERE t.event_type_name='{event2measure}'
       AND n.metric_name='account_tenure'
       AND metric_value >= {min_tenure}
  GROUP BY m.account_id, metric_time, metric_value    
  ORDER BY m.account_id, metric_time, metric_value

-- ON CONFLICT DO NOTHING;
"""

res = pd.read_sql_query(query, conn).sort_values(["account_id", "metric_time"])
res.head()

Unnamed: 0,account_id,metric_time,metric_name_id,metric_value,event_count
0,1,2020-02-23,34,1.076923,1
1,1,2020-03-01,34,1.69697,2
2,1,2020-03-08,34,1.4,2
3,1,2020-03-15,34,1.191489,2
4,1,2020-03-22,34,1.555556,3


### Litsting 7.2 New Observations

In [166]:
from_yyyy_mm_dd = chap7.defaults["%from_yyyy-mm-dd"] 
to_yyyy_mm_dd = chap7.defaults["%to_yyyy-mm-dd"]
from_yyyy_mm_dd, to_yyyy_mm_dd 

('2020-03-01', '2020-05-10')

In [167]:
metric_interval = chap7.list2.params["%metric_interval"]
metric_interval

'7 day'

In [170]:
# listing_7_2_dataset2
# creates a new observation dataset
# new metrics are populated with previous queries

query = f"""\

WITH observation_params AS (
    SELECT
        INTERVAL '{metric_interval}' AS metric_period,
        '{from_yyyy_mm_dd}'::timestamp AS obs_start,
        '{to_yyyy_mm_dd}'::timestamp AS obs_end
)

    SELECT 
        m.account_id, 
        o.observation_date, 
        is_churn,
        SUM(CASE WHEN metric_name_id=0 THEN metric_value ELSE 0 END) AS like_per_month,
        SUM(CASE WHEN metric_name_id=1 THEN metric_value ELSE 0 END) AS newfriend_per_month,
        SUM(CASE WHEN metric_name_id=2 THEN metric_value ELSE 0 END) AS post_per_month,
        SUM(CASE WHEN metric_name_id=3 THEN metric_value ELSE 0 END) AS adview_per_month,
        SUM(CASE WHEN metric_name_id=4 THEN metric_value ELSE 0 END) AS dislike_per_month,
        SUM(CASE WHEN metric_name_id=5 THEN metric_value ELSE 0 END) AS unfriend_per_month,
        SUM(CASE WHEN metric_name_id=6 THEN metric_value ELSE 0 END) AS message_per_month,
        SUM(CASE WHEN metric_name_id=7 THEN metric_value ELSE 0 END) AS reply_per_month,
        SUM(CASE WHEN metric_name_id=8 THEN metric_value ELSE 0 END) AS account_tenure,
        SUM(CASE WHEN metric_name_id=21 THEN metric_value ELSE 0 END) AS adview_per_post,
        SUM(CASE WHEN metric_name_id=22 THEN metric_value ELSE 0 END) AS reply_per_message,
        SUM(CASE WHEN metric_name_id=23 THEN metric_value ELSE 0 END) AS like_per_post,
        SUM(CASE WHEN metric_name_id=24 THEN metric_value ELSE 0 END) AS post_per_message,
        SUM(CASE WHEN metric_name_id=25 THEN metric_value ELSE 0 END) AS unfriend_per_newfriend,
        SUM(CASE WHEN metric_name_id=27 THEN metric_value ELSE 0 END) AS dislike_pcnt,
        SUM(CASE WHEN metric_name_id=28 THEN metric_value ELSE 0 END) AS unfriend_per_newfriend_scaled,
        SUM(CASE WHEN metric_name_id=30 THEN metric_value ELSE 0 END) AS newfriend_pcnt_chng,
        SUM(CASE WHEN metric_name_id=31 THEN metric_value ELSE 0 END) AS days_since_newfriend,
        SUM(CASE WHEN metric_name_id=33 THEN metric_value ELSE 0 END) AS unfriend_28day_avg_84day_obs,
        SUM(CASE WHEN metric_name_id=34 THEN metric_value ELSE 0 END) AS unfriend_28day_avg_84day_obs_scaled
      FROM 
        metric AS m 
INNER JOIN 
        observation_params
        ON metric_time BETWEEN obs_start AND obs_end
INNER JOIN 
        observation AS o 
        ON m.account_id = o.account_id
       AND m.metric_time > (o.observation_date - metric_period)::timestamp
       AND m.metric_time <= o.observation_date::timestamp
  GROUP BY 
        m.account_id, metric_time, observation_date, is_churn
  ORDER BY 
        observation_date,m.account_id
"""

res = pd.read_sql_query(query, conn).sort_values(["account_id", "observation_date"])
res.head()

Unnamed: 0,account_id,observation_date,is_churn,like_per_month,newfriend_per_month,post_per_month,adview_per_month,dislike_per_month,unfriend_per_month,message_per_month,...,reply_per_message,like_per_post,post_per_message,unfriend_per_newfriend,dislike_pcnt,unfriend_per_newfriend_scaled,newfriend_pcnt_chng,days_since_newfriend,unfriend_28day_avg_84day_obs,unfriend_28day_avg_84day_obs_scaled
7221,1,2020-03-23,False,2.0,1.0,6.0,2.0,2.0,2.0,7.0,...,0.571429,0.0,0.857143,2.0,0.0,0.0,0.0,0.0,1.0,1.555556
17738,1,2020-04-23,False,1.0,1.0,2.0,4.0,4.0,1.0,7.0,...,0.428571,0.0,0.285714,1.0,0.0,0.0,0.0,0.0,1.333333,1.365854
2295,2,2020-03-08,True,67.0,7.0,49.0,124.0,120.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0
7542,4,2020-03-24,False,37.0,8.0,39.0,40.0,9.0,0.0,39.0,...,0.076923,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.333333,0.528302
18067,4,2020-04-24,False,30.0,6.0,21.0,32.0,4.0,0.0,27.0,...,0.074074,0.0,0.777778,0.0,0.0,0.0,-0.25,0.0,0.333333,0.345679


In [173]:
res.describe()

Unnamed: 0,account_id,like_per_month,newfriend_per_month,post_per_month,adview_per_month,dislike_per_month,unfriend_per_month,message_per_month,reply_per_month,account_tenure,...,reply_per_message,like_per_post,post_per_message,unfriend_per_newfriend,dislike_pcnt,unfriend_per_newfriend_scaled,newfriend_pcnt_chng,days_since_newfriend,unfriend_28day_avg_84day_obs,unfriend_28day_avg_84day_obs_scaled
count,24450.0,24450.0,24450.0,24450.0,24450.0,24450.0,24450.0,24450.0,24450.0,24450.0,...,24450.0,24450.0,24450.0,24450.0,24450.0,24450.0,24450.0,24450.0,24450.0,24450.0
mean,6094.09317,105.869611,7.363149,44.892638,42.641227,15.730429,0.303067,30.849162,8.122781,70.089407,...,0.256738,0.0,6.284334,0.081123,0.0,2.420859,0.179551,0.834683,0.237164,0.306416
std,3550.154896,211.5918,8.808748,81.584549,73.222626,22.407343,0.546136,68.562317,22.296798,26.736672,...,0.313804,0.0,16.943736,0.229067,0.0,12.628524,0.892838,4.706198,0.289446,0.392316
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,...,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0
25%,3005.5,18.0,2.0,9.0,9.0,4.0,0.0,5.0,1.0,52.0,...,0.063341,0.0,0.428571,0.0,0.0,0.0,-0.25,0.0,0.0,0.0
50%,6075.5,46.0,5.0,22.0,21.0,9.0,0.0,12.0,2.0,80.0,...,0.173913,0.0,1.52381,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,9133.75,109.0,9.0,50.0,47.0,19.0,1.0,29.0,6.0,85.0,...,0.342105,0.0,5.123162,0.0,0.0,0.0,0.333333,0.0,0.333333,0.528302
max,13310.0,5581.0,162.0,2384.0,1457.0,405.0,5.0,1511.0,579.0,116.0,...,8.0,0.0,423.6,3.0,0.0,348.0,12.0,113.0,2.333333,4.2


## Insert Queries

In [125]:
from_yyyy_mm_dd = chap7.defaults["%from_yyyy-mm-dd"] 
to_yyyy_mm_dd = chap7.defaults["%to_yyyy-mm-dd"]
from_yyyy_mm_dd, to_yyyy_mm_dd 

('2020-03-01', '2020-05-10')

### Listing 7.1 Ratio Metric

In [126]:
tmp = chap7.list1.insert
# pprint(tmp)

new_metric_ids = [tmp.params["%new_metric_id"], *[tmp[f"v{i}"]["%new_metric_id"] for i in range(2,8)]] 
new_metric_names = [tmp.params["%new_metric_name"], *[tmp[f"v{i}"]["%new_metric_name"] for i in range(2,8)]] 

# pair of (numerator, denominator)
ratio_metric_pairs = [
    (tmp.params["%num_metric"], tmp.params["%den_metric"]),
    *[(tmp[f"v{i}"]["%num_metric"], tmp[f"v{i}"].get("%den_metric")) for i in range(2,8)]
] 
ratio_metric_pairs

[('adview_per_month', 'post_per_month'),
 ('reply_per_month', 'message_per_month'),
 ('like_per_month', None),
 ('post_per_month', 'message_per_month'),
 ('unfriend_per_month', 'newfriend_per_month'),
 ('dislike_per_month', 'total_opinions'),
 ('unfriend_28day_avg_84day_obs_scaled', 'newfriend_per_month')]

In [127]:
new_metric_ids, new_metric_names

([21, 22, 23, 24, 25, 27, 28],
 ['adviw_per_post',
  'reply_per_message',
  'like_per_post',
  'post_per_message',
  'unfriend_per_newfriend',
  'dislike_percent',
  'unfriend_per_newfriend_scaled'])

In [128]:
# insert_7_1_ratio_metric
def insert_ratio_metric_query(
    new_metric_id, new_metric_name, num_metric, den_metric, from_yyyy_mm_dd, to_yyyy_mm_dd
):

    query = f"""\
    INSERT into metric_name values ({new_metric_id}, concat('{new_metric_name}'))
    ON CONFLICT DO NOTHING;

    -- numerator metric
    WITH num_metric AS (
                SELECT
                        account_id,
                        metric_time,
                        metric_value AS num_value
                    FROM
                        metric AS m
        INNER JOIN metric_name AS n
                         ON n.metric_name_id = m.metric_name_id
                      AND n.metric_name = '{num_metric}'
                      AND metric_time BETWEEN '{from_yyyy_mm_dd}' AND '{to_yyyy_mm_dd}'
    ), -- denominator metric
    den_metric AS (
                SELECT
                        account_id,
                        metric_time,
                        metric_value AS den_value
                  FROM
                        metric AS m
        INNER JOIN metric_name AS n
                        ON n.metric_name_id=m.metric_name_id
                   AND n.metric_name = '{den_metric}'
                     AND metric_time BETWEEN '{from_yyyy_mm_dd}' AND '{to_yyyy_mm_dd}'
    )

    INSERT INTO metric (account_id, metric_time, metric_name_id, metric_value)

             SELECT
                 d.account_id,
                 d.metric_time,
                 {new_metric_id} AS metric_name_id,
          CASE WHEN den_value > 0
                 THEN COALESCE(num_value, 0.0) / den_value
                 ELSE 0
                  END AS metric_value
               FROM den_metric AS d
    LEFT OUTER JOIN num_metric AS n
                 ON n.account_id = d.account_id
                AND n.metric_time = d.metric_time

    ON CONFLICT DO NOTHING;
    """
    
    return query

In [139]:
%%time 

tmp = pd.read_sql_query("SELECT metric_name_id, COUNT(*) FROM metric GROUP BY metric_name_id", conn) 

results = []
for i, (num_metric, den_metric) in enumerate(ratio_metric_pairs):
    new_metric_id = new_metric_ids[i]
    new_metric_name = new_metric_names[i]
    
    if tmp.query(f"metric_name_id == {new_metric_id}").shape[0] != 0: 
        print(f"\"{new_metric_name}\" already created, skipping generating the metric")
        continue
    
    m_query = insert_ratio_metric_query(
        new_metric_id, new_metric_name, 
        num_metric, den_metric,
        from_yyyy_mm_dd, to_yyyy_mm_dd,
    )
    with engine.begin() as connection:
        result = connection.execute(m_query)
    results.append(result)

results

"adviw_per_post" already created, skipping generating the metric
"reply_per_message" already created, skipping generating the metric
"post_per_message" already created, skipping generating the metric
"unfriend_per_newfriend" already created, skipping generating the metric
"unfriend_per_newfriend_scaled" already created, skipping generating the metric
CPU times: user 21.8 ms, sys: 713 µs, total: 22.6 ms
Wall time: 334 ms


[<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fb4978da130>,
 <sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fb4977a26d0>]

### Litsting 7.3 Total Metrics

In [48]:
metric_list = chap7.list3.params["%metric_list"]

metric_list = chap7.list3.insert.params["%metric_list"]
new_metric_id = chap7.list3.insert.params["%new_metric_id"]
new_metric_name = chap7.list3.insert.params["%new_metric_name"]
metric_list, new_metric_id, new_metric_name

(" 'like_per_month', 'dislike_per_month' ", 26, 'total_opinions')

In [148]:
# insert_7_3_total_metric
query = f"""\
INSERT into metric_name values ({new_metric_id},'{new_metric_name}')
ON CONFLICT DO NOTHING;

INSERT INTO metric (account_id,metric_time,metric_name_id,metric_value)

    SELECT 
        account_id, 
        metric_time, 
        {new_metric_id} AS metric_name_id, 
        sum(metric_value) AS metric_total
      FROM 
        metric AS m 
INNER JOIN 
        metric_name AS n 
        ON n.metric_name_id = m.metric_name_id
       AND n.metric_name IN ({metric_list})
     WHERE metric_time BETWEEN '{from_yyyy_mm_dd}' AND '{to_yyyy_mm_dd}'
  GROUP BY metric_time, account_id

ON CONFLICT DO NOTHING;
"""
tmp = pd.read_sql_query("""\
  SELECT metric_name_id, COUNT(*) FROM metric 
   WHERE metric_name_id = 26  GROUP BY metric_name_id""", conn) 

assert tmp.shape[0] == 0, f"{new_meric_name} already created." 
with engine.begin() as connection:
    result = connection.execute(query)
result

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fb497b36cd0>

### Litsting 7.4 Percent Chnage Metrics

In [149]:
from_yyyy_mm_dd = chap7.defaults["%from_yyyy-mm-dd"] 
to_yyyy_mm_dd = chap7.defaults["%to_yyyy-mm-dd"]
from_yyyy_mm_dd, to_yyyy_mm_dd 

('2020-03-01', '2020-05-10')

In [150]:
new_metric_id = chap7.list4.insert.params["%new_metric_id"]
new_metric_name = chap7.list4.insert.params["%new_metric_name"]
metric2measure = chap7.list4.insert.params["%metric2measure"]
period_weeks = chap7.list4.insert.params["%period_weeks"]

new_metric_id, new_metric_name, metric2measure, period_weeks

(30, 'new_friends_pcnt_change', 'newfriend_per_month', 4)

In [151]:
# insert_7_4_percent_change_metric

query = f"""\
INSERT into metric_name values ({new_metric_id},'{new_metric_name}')
ON CONFLICT DO NOTHING;

WITH end_metric AS (
    SELECT 
        account_id, 
        metric_time, 
        metric_value AS end_value
      FROM metric AS m 
INNER JOIN metric_name AS n 
        ON n.metric_name_id = m.metric_name_id
       AND n.metric_name = '{metric2measure}'
       AND metric_time BETWEEN '{from_yyyy_mm_dd}' AND '{to_yyyy_mm_dd}'
), 
start_metric AS (
    SELECT 
        account_id, 
        metric_time, 
        metric_value AS start_value
      FROM metric AS m 
INNER JOIN metric_name AS n 
        ON n.metric_name_id = m.metric_name_id
       AND n.metric_name = '{metric2measure}'
       AND metric_time BETWEEN ('{from_yyyy_mm_dd}'::timestamp - interval '{period_weeks} week')
       AND ('{to_yyyy_mm_dd}'::timestamp - interval '{period_weeks} week')
)

INSERT INTO metric (account_id,metric_time,metric_name_id,metric_value)

          SELECT 
              s.account_id AS account_id,  
              s.metric_time + interval '{period_weeks} week' AS metric_name, 
              {new_metric_id} AS metric_name_id,
              COALESCE(end_value,0.0) / start_value - 1.0 AS metric_value
            FROM 
              start_metric AS s 
 LEFT OUTER JOIN end_metric AS e
              ON s.account_id = e.account_id
             AND e.metric_time = (s.metric_time + interval '{period_weeks} week')
           WHERE start_value > 0
 
ON CONFLICT DO NOTHING;
"""
tmp = pd.read_sql_query("""\
  SELECT metric_name_id, COUNT(*) FROM metric 
   WHERE metric_name_id = 30  GROUP BY metric_name_id""", conn) 

assert tmp.shape[0] == 0, f"{new_meric_name} already created." 
with engine.begin() as connection:
    result = connection.execute(query)
result

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fb497b85ac0>

### Litsting 7.6 Days Since Events Metrics

In [152]:
from_yyyy_mm_dd = chap7.list6.params["%from_yyyy-mm-dd"] 
to_yyyy_mm_dd = chap7.list6.params["%to_yyyy-mm-dd"]
from_yyyy_mm_dd, to_yyyy_mm_dd 

('2020-05-03', '2020-05-10')

In [153]:
event2measure = chap7.list6.insert.params["%event2measure"]
new_metric_id = chap7.list6.insert.params["%new_metric_id"]
new_metric_name = chap7.list6.insert.params["%new_metric_name"]

event2measure, new_metric_id, new_metric_name

('newfriend', 31, 'days_since_newfriend')

In [155]:
# insert_7_6_days_since_event

query = f"""\
INSERT into metric_name values ({new_metric_id},concat('days_since_{event2measure}' ))
ON CONFLICT DO NOTHING;

WITH date_vals AS (
  SELECT 
      i::date AS metric_date
    FROM 
      GENERATE_SERIES('{from_yyyy_mm_dd}', '{to_yyyy_mm_dd}', '7 day'::interval) AS i
),
last_event AS (
      SELECT 
          account_id, 
          metric_date, 
          MAX(event_time)::date AS last_date
        FROM 
          event AS e 
  INNER JOIN date_vals AS d
          ON e.event_time::date <= metric_date
  INNER JOIN event_type AS t 
          ON t.event_type_id=e.event_type_id
       WHERE t.event_type_name='{event2measure}'
    GROUP BY account_id, metric_date
    ORDER BY account_id, metric_date
)

INSERT INTO metric (account_id,metric_time,metric_name_id,metric_value)

    SELECT 
        account_id, 
        metric_date AS metric_time, 
        {new_metric_id} AS metric_name_id,
        metric_date - last_date AS days_since_event -- metic_value
      FROM 
        last_event

ON CONFLICT DO NOTHING;
"""
tmp = pd.read_sql_query("""\
  SELECT metric_name_id, COUNT(*) FROM metric 
   WHERE metric_name_id = 31  GROUP BY metric_name_id""", conn) 

assert tmp.shape[0] == 0, f"{new_meric_name} already created." 
with engine.begin() as connection:
    result = connection.execute(query)
result


<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fb4975f85e0>

### Litsting 7.7 Scaled Events per Month Metrics

In [156]:
from_yyyy_mm_dd = chap7.defaults["%from_yyyy-mm-dd"] 
to_yyyy_mm_dd = chap7.defaults["%to_yyyy-mm-dd"]
from_yyyy_mm_dd, to_yyyy_mm_dd 

('2020-03-01', '2020-05-10')

In [157]:
desc_period = chap7.list7.insert.params["%desc_period"]
obs_period = chap7.list7.params["%obs_period"]
event2measure = chap7.list7.insert.params["%event2measure"]
new_metric_id = chap7.list7.insert.params["%new_metric_id"]

desc_period, obs_period, event2measure, new_metric_id

(28, 84, 'unfriend', 33)

In [160]:
# insert_7_7_scaled_events_per_month
query = f"""\

INSERT into metric_name values ({new_metric_id},'{event2measure}_{desc_period}day_avg_{obs_period}day_obs')
ON CONFLICT DO NOTHING;

WITH date_vals AS (
    SELECT 
        i::timestamp AS metric_date 
      FROM 
        GENERATE_SERIES('{from_yyyy_mm_dd}', '{to_yyyy_mm_dd}', '7 day'::interval) AS i
)

INSERT INTO metric (account_id,metric_time,metric_name_id,metric_value)

    SELECT 
        account_id, 
        metric_date, 
        {new_metric_id} AS metric_name_id,
        (({desc_period})::float / ({obs_period})::float) * COUNT(*) AS metric_value
      FROM 
        event AS e 
INNER JOIN 
        date_vals AS d
        ON e.event_time <= metric_date 
       AND e.event_time > metric_date - interval '{obs_period} days'
INNER JOIN 
        event_type AS t 
        ON t.event_type_id=e.event_type_id
     WHERE t.event_type_name='{event2measure}'
  GROUP BY account_id, metric_date
  ORDER BY metric_date, account_id

ON CONFLICT DO NOTHING;
"""
tmp = pd.read_sql_query("""\
  SELECT metric_name_id, COUNT(*) FROM metric 
   WHERE metric_name_id = 33  GROUP BY metric_name_id""", conn) 

assert tmp.shape[0] == 0, f"{new_meric_name} already created." 
with engine.begin() as connection:
    result = connection.execute(query)
result


<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fb4978dad60>

### Litsting 7.8 Tenure Scaled Events per Month Metrics

In [161]:
from_yyyy_mm_dd = chap7.list8.params["%from_yyyy-mm-dd"] 
to_yyyy_mm_dd = chap7.list8.params["%to_yyyy-mm-dd"]
from_yyyy_mm_dd, to_yyyy_mm_dd 

('2020-05-10', '2020-05-10')

In [162]:
desc_period = chap7.list8.insert.params["%desc_period"] 
obs_period = chap7.list8.insert.params["%obs_period"] 
min_tenure = chap7.list8.insert.params["%min_tenure"] 
event2measure = chap7.list8.insert.params["%event2measure"] 
new_metric_id = chap7.list8.insert.params["%new_metric_id"] 

desc_period, obs_period, min_tenure, event2measure, new_metric_id

(28, 84, 14, 'unfriend', 34)

In [165]:
# insert_7_8_tenure_scaled_events_per_month
query = f"""\

INSERT INTO metric_name VALUES ({new_metric_id},'{event2measure}_{desc_period}day_avg_{obs_period}day_obs_scaled')
ON CONFLICT DO NOTHING;


INSERT INTO metric (account_id,metric_time,metric_name_id,metric_value)

    SELECT 
        m.account_id, 
        metric_time, 
        {new_metric_id} AS metric_name_id,
        ({desc_period} / least({obs_period}, m.metric_value)) * count(*) AS metric_value
      FROM 
        event AS e 
INNER JOIN 
        metric AS m
        ON m.account_id = e.account_id
       AND event_time <= metric_time
       AND event_time >  metric_time-interval '{obs_period} days'
INNER JOIN 
        event_type AS t 
        ON t.event_type_id=e.event_type_id
INNER JOIN 
        metric_name AS n 
        ON m.metric_name_id = n.metric_name_id
     WHERE t.event_type_name='{event2measure}'
       AND n.metric_name='account_tenure'
       AND metric_value >= {min_tenure}
  GROUP BY m.account_id, metric_time, metric_value    
  ORDER BY m.account_id, metric_time, metric_value

ON CONFLICT DO NOTHING;
"""

tmp = pd.read_sql_query("""\
  SELECT metric_name_id, COUNT(*) FROM metric 
   WHERE metric_name_id = 34  GROUP BY metric_name_id""", conn) 

assert tmp.shape[0] == 0, f"{new_meric_name} already created." 
with engine.begin() as connection:
    result = connection.execute(query)
result

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fb4978da1f0>