In [None]:
from odps_client import get_odps_sql_result_as_df

user_click_df=get_odps_sql_result_as_df("""
SELECT  cust_id
        ,COUNT(DISTINCT ds) AS 活跃天数
        ,COUNT(CASE    WHEN bid LIKE '%pid:goods,sku:%' THEN 1 END) 商品详情页点击次数
        ,COUNT(CASE    WHEN bid LIKE '%pid:唤起购买%' OR pid = '唤起购买' THEN 1 END) 购物车卡片点击次数
FROM    summerfarm_tech.dwd_log_mall_di a
WHERE   a.ds >= '20240901'
AND     envent_type = 'cl'
GROUP BY cust_id
;""")

user_click_df.head(2)

In [None]:
user_register_df=get_odps_sql_result_as_df("""
SELECT  m_id AS cust_id
        ,CASE   WHEN invitecode = 'seeegj' THEN '自主注册'
                ELSE 'BD邀请'
        END 是否自主注册
        ,register_time 注册日期
FROM    summerfarm_tech.ods_merchant_df
WHERE   ds = MAX_PT("summerfarm_tech.ods_merchant_df")
AND     size = '单店'
AND     register_time >= '2024-09-01 00:00:00'
AND     islock = 0
AND     operate_status IN (0,2,3) -- 0:已审核通过，2:待提交信息，3:已提交信息
;""")

user_register_df['注册日期'] = user_register_df['注册日期'].astype(str)
user_register_df['注册日期'] = user_register_df['注册日期'].str.split(' ').str[0]
user_register_df.head(2)

In [None]:
user_orders_df=get_odps_sql_result_as_df("""
SELECT  a.*
        ,b.total_price
        ,b.order_no
        ,b.order_time
FROM    (
            SELECT  m_id AS cust_id
                    ,CASE   WHEN invitecode = 'seeegj' THEN '自主注册'
                            ELSE 'BD邀请'
                    END 是否自主注册
                    ,CASE   WHEN register_time > '2024-09-21 00:00:00' THEN '优化后注册'
                            ELSE '优化前'
                    END 是否优化后注册
                    ,register_time
            FROM    summerfarm_tech.ods_merchant_df
            WHERE   ds = MAX_PT("summerfarm_tech.ods_merchant_df")
            AND     size = '单店'
            AND     register_time >= '2024-09-14 00:00:00'
            AND     register_time < '2024-09-29 00:00:00'
            AND     islock = 0
            AND     operate_status IN (0,2,3) -- 0:已审核通过，2:待提交信息，3:已提交信息
        ) a
LEFT JOIN summerfarm_tech.ods_orders_df b
ON      b.ds = MAX_PT("summerfarm_tech.ods_orders_df")
AND     a.cust_id = b.m_id
AND     b.order_time <= DATEADD(a.register_time,14,'dd') -- 注册后14天内的行为
;
""")

user_orders_df.head(2)

In [None]:
import pandasql

user_orders_df['total_price'].fillna(0.0, inplace=True)
user_orders_df['total_price'] = user_orders_df['total_price'].astype(float)

df = pandasql.sqldf("""
SELECT 是否自主注册, 是否优化后注册
    ,COUNT(DISTINCT cust_id) AS 总人数
    ,COUNT(DISTINCT CASE WHEN order_no IS NOT NULL AND order_time > register_time AND julianday(order_time) - julianday(register_time) <= 14 THEN cust_id END) AS 总下单人数
    ,COUNT(DISTINCT CASE WHEN order_time > register_time AND julianday(order_time) - julianday(register_time) <= 14 THEN order_no END) AS 总下单笔数
    ,SUM(CASE WHEN order_time > register_time AND julianday(order_time) - julianday(register_time) <= 14 THEN total_price ELSE 0 END) AS 总GMV
    ,SUM(CASE WHEN order_time > register_time AND julianday(order_time) - julianday(register_time) <= 14 THEN total_price ELSE 0 END) / COUNT(DISTINCT cust_id) AS 人均GMV
    ,SUM(CASE WHEN order_time > register_time AND julianday(order_time) - julianday(register_time) <= 14 THEN total_price ELSE 0 END) / 
     NULLIF(COUNT(DISTINCT CASE WHEN order_no IS NOT NULL AND order_time > register_time AND julianday(order_time) - julianday(register_time) <= 14 THEN cust_id END), 0) AS 下单人群人均GMV
    ,SUM(CASE WHEN order_time > register_time AND julianday(order_time) - julianday(register_time) <= 14 THEN total_price ELSE 0 END) / 
     NULLIF(COUNT(DISTINCT CASE WHEN order_time > register_time AND julianday(order_time) - julianday(register_time) <= 14 THEN order_no END), 0) AS 订单均价
    ,MIN(CASE WHEN order_time > register_time AND julianday(order_time) - julianday(register_time) <= 14 THEN order_time END) || '~' || 
     MAX(CASE WHEN order_time > register_time AND julianday(order_time) - julianday(register_time) <= 14 THEN order_time END) AS 下单日期范围
    ,CAST(COUNT(DISTINCT CASE WHEN order_no IS NOT NULL AND order_time > register_time AND julianday(order_time) - julianday(register_time) <= 14 THEN cust_id END) AS FLOAT) / 
     COUNT(DISTINCT cust_id) AS 下单转化率
FROM user_orders_df
GROUP BY 是否自主注册, 是否优化后注册
ORDER BY 是否自主注册, 是否优化后注册
""")

df

In [None]:
user_orders_df[user_orders_df['是否自主注册']=='自主注册'].describe()

In [None]:
user_orders_df[(user_orders_df['是否自主注册']=='自主注册') & (user_orders_df['是否优化后注册']=='优化前')].describe()

In [None]:
user_orders_df[(user_orders_df['是否自主注册']=='自主注册') & (user_orders_df['是否优化后注册']=='优化后注册')].describe()