In [None]:
# ! pip3 install bytedtqs 

In [1]:
from itertools import combinations
from scipy.special import factorial, comb
import pandas as pd
import numpy as np
import random
from IPython.display import display, HTML
from datetime import date, timedelta

import bytedtqs

## 分析配置

生成默认日期，注意如果要分析 Dx 转化，日期需要自己手动配置。

In [2]:
# common dates
YESTERDAY = (date.today() - timedelta(days=1)).strftime('%Y-%m-%d')
DOD = (date.today() - timedelta(days=2)).strftime('%Y-%m-%d')
WOW = (date.today() - timedelta(days=8)).strftime('%Y-%m-%d')

YESTERDAY_3 = (date.today() - timedelta(days=1+3)).strftime('%Y-%m-%d')
DOD_3 = (date.today() - timedelta(days=2+3)).strftime('%Y-%m-%d')
WOW_3 = (date.today() - timedelta(days=8+3)).strftime('%Y-%m-%d')

YESTERDAY_6 = (date.today() - timedelta(days=1+6)).strftime('%Y-%m-%d')
DOD_6 = (date.today() - timedelta(days=2+6)).strftime('%Y-%m-%d')
WOW_6 = (date.today() - timedelta(days=8+6)).strftime('%Y-%m-%d')

YESTERDAY_8 = (date.today() - timedelta(days=1+8)).strftime('%Y-%m-%d')
DOD_8 = (date.today() - timedelta(days=2+8)).strftime('%Y-%m-%d')
WOW_8 = (date.today() - timedelta(days=8+8)).strftime('%Y-%m-%d')

# compare dates
wow = [WOW, WOW]
dod = [DOD, DOD]
ytd = [YESTERDAY, YESTERDAY]
wow_3 = [WOW_3, WOW_3]
dod_3 = [DOD_3, DOD_3]
ytd_3 = [YESTERDAY_3, YESTERDAY_3]
wow_6 = [WOW_6, WOW_6]
dod_6 = [DOD_6, DOD_6]
ytd_6 = [YESTERDAY_6, YESTERDAY_6]
wow_8 = [WOW_8, WOW_8]
dod_8 = [DOD_8, DOD_8]
ytd_8 = [YESTERDAY_8, YESTERDAY_8]

## 数据源配置

**提前为分析场景配置的，一般不需要修改。**

默认一次加载过去 DAYS_BACK 的数据，减少 DB 读取，后续都通过 Python 处理。

In [3]:
# date range for raw data
DAYS_BACK = 90

data_date_start = date.today() - timedelta(days=DAYS_BACK)
data_date_end = date.today() - timedelta(days=1)

start_date = data_date_start.strftime('%Y-%m-%d')
end_date = data_date_end.strftime('%Y-%m-%d')

每日分维度的基础指标的 SQL。**注意：维度如果包含 NULL 值，pandas 聚合结果就不准群。**

In [4]:
# sql for raw data
sql = """
set tqs.query.engine.type=sparkcli;

select
  install_date
  ,coalesce(app_name,'unknown') as app_name
  ,coalesce(os,'unknown') as os
  ,case when channel_user_name in ('toutiao_promote','toutiaodsp_new') then '内广'     
    when channel_user_name in ('store_appstore','AppStore') then 'Apple Store'  
    when channel_user_name in ('googleadwords_int','store_google','google','Facebook') then 'Google-FB'     
    when channel_user_name in ('huawei_id','huaweipps_id','huaweiywjj_id','xiaomi_id') then '华为小米'      
    when channel_user_name = 'oppo_id' then 'Oppo'
    when channel_user_name = 'vivo_id' then 'Vivo'
    when channel_user_name = 'guangdiantong' then '广点通' 
    else '其他' end as channel
  ,coalesce(occupation,'unknown') as occupation
  ,coalesce(first_level_name,'unknown') as first_level_name
  ,coalesce(first_milestone_name,'unknown') as first_milestone_name
  ,coalesce(edu,'unknown') as edu
  ,coalesce(age,'unknown') as age
  ,coalesce(city_level,'unknown') as city_level
  ,coalesce(career,'unknown') as career
  ,sum(dnu_did) as dnu
  ,sum(d0_copy_wx) as d0_copy_wx
  ,sum(d3_copy_wx) as d3_copy_wx
  ,sum(d5_copy_wx) as d5_copy_wx
  ,sum(d6_copy_wx) as d6_copy_wx
  ,sum(d8_copy_wx) as d8_copy_wx
  ,sum(d0_enter_camp) as d0_enter_camp
  ,sum(d3_enter_camp) as d3_enter_camp
  ,sum(d5_enter_camp) as d5_enter_camp
  ,sum(d6_enter_camp) as d6_enter_camp
  ,sum(d8_enter_camp) as d8_enter_camp
  ,sum(d0_entered_wechat_group) as d0_enter_group
  ,sum(d3_entered_wechat_group) as d3_enter_group
  ,sum(d5_entered_wechat_group) as d5_enter_group
  ,sum(d6_entered_wechat_group) as d6_enter_group
  ,sum(d8_entered_wechat_group) as d8_enter_group
  ,sum(d0_study_10_mins) as d0_study
  ,sum(d3_study_10_mins) as d3_study
  ,sum(d5_study_10_mins) as d5_study
  ,sum(d6_study_10_mins) as d6_study
  ,sum(d8_study_10_mins) as d8_study
  ,sum(d0_in_wechat_group_total_cnt) as d0_group_order
  ,sum(d3_in_wechat_group_total_cnt) as d3_group_order
  ,sum(d5_in_wechat_group_total_cnt) as d5_group_order
  ,sum(d6_in_wechat_group_total_cnt) as d6_group_order
  ,sum(d8_in_wechat_group_total_cnt) as d8_group_order
  ,sum(d0_in_wechat_group_order_revenue/100) as d0_group_revenue
  ,sum(d3_in_wechat_group_order_revenue/100) as d3_group_revenue
  ,sum(d5_in_wechat_group_order_revenue/100) as d5_group_revenue
  ,sum(d6_in_wechat_group_order_revenue/100) as d6_group_revenue
  ,sum(d8_in_wechat_group_order_revenue/100) as d8_group_revenue
  ,sum(d0_out_wechat_group_total_cnt) as d0_out_group_order
  ,sum(d3_out_wechat_group_total_cnt) as d3_out_group_order
  ,sum(d5_out_wechat_group_total_cnt) as d5_out_group_order
  ,sum(d6_out_wechat_group_total_cnt) as d6_out_group_order
  ,sum(d8_out_wechat_group_total_cnt) as d8_out_group_order
  ,sum(d0_out_wechat_group_order_revenue/100) as d0_out_group_revenue
  ,sum(d3_out_wechat_group_order_revenue/100) as d3_out_group_revenue
  ,sum(d5_out_wechat_group_order_revenue/100) as d5_out_group_revenue
  ,sum(d6_out_wechat_group_order_revenue/100) as d6_out_group_revenue
  ,sum(d8_out_wechat_group_order_revenue/100) as d8_out_group_revenue
from dm_ky.app_newer_roi_df
where date = date_format(date_add(current_date,-1),'yyyyMMdd')
  and install_date between '{start_date}' and '{end_date}'
group by install_date
  ,coalesce(app_name,'unknown')
  ,coalesce(os,'unknown')
  ,case when channel_user_name in ('toutiao_promote','toutiaodsp_new') then '内广'     
    when channel_user_name in ('store_appstore','AppStore') then 'Apple Store'  
    when channel_user_name in ('googleadwords_int','store_google','google','Facebook') then 'Google-FB'     
    when channel_user_name in ('huawei_id','huaweipps_id','huaweiywjj_id','xiaomi_id') then '华为小米'      
    when channel_user_name = 'oppo_id' then 'Oppo'
    when channel_user_name = 'vivo_id' then 'Vivo'
    when channel_user_name = 'guangdiantong' then '广点通' 
    else '其他' end
  ,coalesce(occupation,'unknown')
  ,coalesce(first_level_name,'unknown')
  ,coalesce(first_milestone_name,'unknown')
  ,coalesce(edu,'unknown')
  ,coalesce(age,'unknown')
  ,coalesce(city_level,'unknown')
  ,coalesce(career,'unknown')
"""

后续按不同日期和维度在 Python 内聚合，提前写好聚合函数。

In [5]:
def custom_aggregate(x):
    s = {
        '新增': x['dnu'].sum(),
        'd0_复制微信': x['d0_copy_wx'].sum(),
        'd3_复制微信': x['d3_copy_wx'].sum(),
        'd5_复制微信': x['d5_copy_wx'].sum(),
        'd6_复制微信': x['d6_copy_wx'].sum(),
        'd8_复制微信': x['d8_copy_wx'].sum(),
        
        'd0_报名': x['d0_enter_camp'].sum(),
        'd3_报名': x['d3_enter_camp'].sum(),
        'd5_报名': x['d5_enter_camp'].sum(),
        'd6_报名': x['d6_enter_camp'].sum(),
        'd8_报名': x['d8_enter_camp'].sum(),
        
        'd0_入群': x['d0_enter_group'].sum(),
        'd3_入群': x['d3_enter_group'].sum(),
        'd5_入群': x['d5_enter_group'].sum(),
        'd6_入群': x['d6_enter_group'].sum(),
        'd8_入群': x['d8_enter_group'].sum(),
        
        'd0_学习': x['d0_study'].sum(),
        'd3_学习': x['d3_study'].sum(),
        'd5_学习': x['d5_study'].sum(),
        'd6_学习': x['d6_study'].sum(),
        'd8_学习': x['d8_study'].sum(),
        
        'd0_群内订单': x['d0_group_order'].sum(),
        'd3_群内订单': x['d3_group_order'].sum(),
        'd5_群内订单': x['d5_group_order'].sum(),
        'd6_群内订单': x['d6_group_order'].sum(),
        'd8_群内订单': x['d8_group_order'].sum(),
        
        'd0_群内销售': x['d0_group_revenue'].sum(),
        'd3_群内销售': x['d3_group_revenue'].sum(),
        'd5_群内销售': x['d5_group_revenue'].sum(),
        'd6_群内销售': x['d6_group_revenue'].sum(),
        'd8_群内销售': x['d8_group_revenue'].sum(),
        
        'd0_群外订单': x['d0_out_group_order'].sum(),
        'd3_群外订单': x['d3_out_group_order'].sum(),
        'd5_群外订单': x['d5_out_group_order'].sum(),
        'd6_群外订单': x['d6_out_group_order'].sum(),
        'd8_群外订单': x['d8_out_group_order'].sum(),
        
        'd0_群外销售': x['d0_out_group_revenue'].sum(),
        'd3_群外销售': x['d3_out_group_revenue'].sum(),
        'd5_群外销售': x['d5_out_group_revenue'].sum(),
        'd6_群外销售': x['d6_out_group_revenue'].sum(),
        'd8_群外销售': x['d8_out_group_revenue'].sum(),

        'd0_新增到复制': x['d0_copy_wx'].sum()/x['dnu'].sum(),
        'd3_新增到复制': x['d3_copy_wx'].sum()/x['dnu'].sum(),
        'd5_新增到复制': x['d5_copy_wx'].sum()/x['dnu'].sum(),
        'd6_新增到复制': x['d6_copy_wx'].sum()/x['dnu'].sum(),
        'd8_新增到复制': x['d8_copy_wx'].sum()/x['dnu'].sum(),
        
        'd0_新增到报名': x['d0_enter_camp'].sum()/x['dnu'].sum(),
        'd3_新增到报名': x['d3_enter_camp'].sum()/x['dnu'].sum(),
        'd5_新增到报名': x['d5_enter_camp'].sum()/x['dnu'].sum(),
        'd6_新增到报名': x['d6_enter_camp'].sum()/x['dnu'].sum(),
        'd8_新增到报名': x['d8_enter_camp'].sum()/x['dnu'].sum(),
        
        'd0_新增到入群': x['d0_enter_group'].sum()/x['dnu'].sum(),
        'd3_新增到入群': x['d3_enter_group'].sum()/x['dnu'].sum(),
        'd5_新增到入群': x['d5_enter_group'].sum()/x['dnu'].sum(),
        'd6_新增到入群': x['d6_enter_group'].sum()/x['dnu'].sum(),
        'd8_新增到入群': x['d8_enter_group'].sum()/x['dnu'].sum(),

        'd0_报名到复制': 0 if x['d0_enter_camp'].sum() == 0 else x['d0_copy_wx'].sum()/x['d0_enter_camp'].sum(),
        'd3_报名到复制': 0 if x['d3_enter_camp'].sum() == 0 else x['d3_copy_wx'].sum()/x['d3_enter_camp'].sum(),
        'd5_报名到复制': 0 if x['d5_enter_camp'].sum() == 0 else x['d5_copy_wx'].sum()/x['d5_enter_camp'].sum(),
        'd6_报名到复制': 0 if x['d6_enter_camp'].sum() == 0 else x['d6_copy_wx'].sum()/x['d6_enter_camp'].sum(),
        'd8_报名到复制': 0 if x['d8_enter_camp'].sum() == 0 else x['d8_copy_wx'].sum()/x['d8_enter_camp'].sum(),

        'd0_复制到入群': 0 if x['d0_copy_wx'].sum() == 0 else x['d0_enter_group'].sum()/x['d0_copy_wx'].sum(),
        'd3_复制到入群': 0 if x['d3_copy_wx'].sum() == 0 else x['d3_enter_group'].sum()/x['d3_copy_wx'].sum(),
        'd5_复制到入群': 0 if x['d5_copy_wx'].sum() == 0 else x['d5_enter_group'].sum()/x['d5_copy_wx'].sum(),
        'd6_复制到入群': 0 if x['d6_copy_wx'].sum() == 0 else x['d6_enter_group'].sum()/x['d6_copy_wx'].sum(),
        'd8_复制到入群': 0 if x['d8_copy_wx'].sum() == 0 else x['d8_enter_group'].sum()/x['d8_copy_wx'].sum(),

        'd0_报名到入群': 0 if x['d0_enter_camp'].sum() == 0 else x['d0_enter_group'].sum()/x['d0_enter_camp'].sum(),
        'd3_报名到入群': 0 if x['d3_enter_camp'].sum() == 0 else x['d3_enter_group'].sum()/x['d3_enter_camp'].sum(),
        'd5_报名到入群': 0 if x['d5_enter_camp'].sum() == 0 else x['d5_enter_group'].sum()/x['d5_enter_camp'].sum(),
        'd5_报名到入群': 0 if x['d6_enter_camp'].sum() == 0 else x['d6_enter_group'].sum()/x['d6_enter_camp'].sum(),
        'd8_报名到入群': 0 if x['d8_enter_camp'].sum() == 0 else x['d8_enter_group'].sum()/x['d8_enter_camp'].sum(),

        'd0_群内人均营收': 0 if x['d0_enter_group'].sum() == 0 else x['d0_group_revenue'].sum()/x['d0_enter_group'].sum(),
        'd3_群内人均营收': 0 if x['d3_enter_group'].sum() == 0 else x['d3_group_revenue'].sum()/x['d3_enter_group'].sum(),
        'd5_群内人均营收': 0 if x['d5_enter_group'].sum() == 0 else x['d5_group_revenue'].sum()/x['d5_enter_group'].sum(),
        'd5_群内人均营收': 0 if x['d6_enter_group'].sum() == 0 else x['d6_group_revenue'].sum()/x['d6_enter_group'].sum(),
        'd8_群内人均营收': 0 if x['d8_enter_group'].sum() == 0 else x['d8_group_revenue'].sum()/x['d8_enter_group'].sum(),

        'd0_新增人均营收': x['d0_group_revenue'].sum()/x['dnu'].sum(),
        'd3_新增人均营收': x['d3_group_revenue'].sum()/x['dnu'].sum(),
        'd5_新增人均营收': x['d5_group_revenue'].sum()/x['dnu'].sum(),
        'd6_新增人均营收': x['d6_group_revenue'].sum()/x['dnu'].sum(),
        'd8_新增人均营收': x['d8_group_revenue'].sum()/x['dnu'].sum(),

        'd0_群内转化率': 0 if x['d0_enter_group'].sum() == 0 else x['d0_group_order'].sum()/x['d0_enter_group'].sum(),
        'd3_群内转化率': 0 if x['d3_enter_group'].sum() == 0 else x['d3_group_order'].sum()/x['d3_enter_group'].sum(),
        'd5_群内转化率': 0 if x['d5_enter_group'].sum() == 0 else x['d5_group_order'].sum()/x['d5_enter_group'].sum(),
        'd6_群内转化率': 0 if x['d6_enter_group'].sum() == 0 else x['d6_group_order'].sum()/x['d6_enter_group'].sum(),
        'd8_群内转化率': 0 if x['d8_enter_group'].sum() == 0 else x['d8_group_order'].sum()/x['d8_enter_group'].sum(),

        'd0_新增转化率': x['d0_group_order'].sum()/x['dnu'].sum(),
        'd3_新增转化率': x['d3_group_order'].sum()/x['dnu'].sum(),
        'd5_新增转化率': x['d5_group_order'].sum()/x['dnu'].sum(),
        'd6_新增转化率': x['d6_group_order'].sum()/x['dnu'].sum(),
        'd8_新增转化率': x['d8_group_order'].sum()/x['dnu'].sum(),

        'd0_群外转化率': 0 if (x['dnu'].sum() - x['d0_enter_group'].sum()) == 0 else x['d0_out_group_order'].sum()/(x['dnu'].sum() - x['d0_enter_group'].sum()),
        'd3_群外转化率': 0 if (x['dnu'].sum() - x['d3_enter_group'].sum()) == 0 else x['d3_out_group_order'].sum()/(x['dnu'].sum() - x['d3_enter_group'].sum()),
        'd5_群外转化率': 0 if (x['dnu'].sum() - x['d5_enter_group'].sum()) == 0 else x['d5_out_group_order'].sum()/(x['dnu'].sum() - x['d5_enter_group'].sum()),
        'd6_群外转化率': 0 if (x['dnu'].sum() - x['d6_enter_group'].sum()) == 0 else x['d6_out_group_order'].sum()/(x['dnu'].sum() - x['d6_enter_group'].sum()),
        'd8_群外转化率': 0 if (x['dnu'].sum() - x['d8_enter_group'].sum()) == 0 else x['d8_out_group_order'].sum()/(x['dnu'].sum() - x['d8_enter_group'].sum()),
    
        'd0_群外人均营收': 0 if (x['dnu'].sum() - x['d0_enter_group'].sum()) == 0 else x['d0_out_group_revenue'].sum()/(x['dnu'].sum() - x['d0_enter_group'].sum()),
        'd3_群外人均营收': 0 if (x['dnu'].sum() - x['d3_enter_group'].sum()) == 0 else x['d3_out_group_revenue'].sum()/(x['dnu'].sum() - x['d3_enter_group'].sum()),
        'd5_群外人均营收': 0 if (x['dnu'].sum() - x['d5_enter_group'].sum()) == 0 else x['d5_out_group_revenue'].sum()/(x['dnu'].sum() - x['d5_enter_group'].sum()),
        'd6_群外人均营收': 0 if (x['dnu'].sum() - x['d6_enter_group'].sum()) == 0 else x['d6_out_group_revenue'].sum()/(x['dnu'].sum() - x['d6_enter_group'].sum()),
        'd8_群外人均营收': 0 if (x['dnu'].sum() - x['d8_enter_group'].sum()) == 0 else x['d8_out_group_revenue'].sum()/(x['dnu'].sum() - x['d8_enter_group'].sum()),

        'd0_学习率': 0 if (x['dnu'].sum() == 0) else x['d0_study'].sum()/x['dnu'].sum(),
        'd3_学习率': 0 if (x['dnu'].sum() == 0) else x['d3_study'].sum()/x['dnu'].sum(),
        'd5_学习率': 0 if (x['dnu'].sum() == 0) else x['d5_study'].sum()/x['dnu'].sum(),
        'd6_学习率': 0 if (x['dnu'].sum() == 0) else x['d6_study'].sum()/x['dnu'].sum(),
        'd8_学习率': 0 if (x['dnu'].sum() == 0) else x['d8_study'].sum()/x['dnu'].sum(),
    }
    
    return pd.Series(s, index=s.keys())


def custom_results(df, dim_str_list, date_range_list):
    temp = df[lambda x: x['install_date'].between(*date_range_list)].\
        groupby(dim_str_list).apply(custom_aggregate).reset_index()
    temp['总新增'] = temp['新增'].sum()
    temp['d0_总复制微信'] = temp['d0_复制微信'].sum()
    temp['d3_总复制微信'] = temp['d3_复制微信'].sum()
    temp['d5_总复制微信'] = temp['d5_复制微信'].sum()
    temp['d6_总复制微信'] = temp['d6_复制微信'].sum()
    temp['d8_总复制微信'] = temp['d8_复制微信'].sum()
    temp['d0_总报名'] = temp['d0_报名'].sum()
    temp['d3_总报名'] = temp['d3_报名'].sum()
    temp['d5_总报名'] = temp['d5_报名'].sum()
    temp['d6_总报名'] = temp['d6_报名'].sum()
    temp['d8_总报名'] = temp['d8_报名'].sum()
    temp['d0_总入群'] = temp['d0_入群'].sum()
    temp['d3_总入群'] = temp['d3_入群'].sum()
    temp['d5_总入群'] = temp['d5_入群'].sum()
    temp['d6_总入群'] = temp['d6_入群'].sum()
    temp['d8_总入群'] = temp['d8_入群'].sum()
    temp['新增占比'] = temp['新增']/temp['总新增']
    temp['d0_复制微信占比'] = np.where(temp['d0_总复制微信'] == 0, 0, temp['d0_复制微信']/temp['d0_总复制微信'])
    temp['d3_复制微信占比'] = np.where(temp['d3_总复制微信'] == 0, 0, temp['d3_复制微信']/temp['d3_总复制微信'])
    temp['d5_复制微信占比'] = np.where(temp['d5_总复制微信'] == 0, 0, temp['d5_复制微信']/temp['d5_总复制微信'])
    temp['d6_复制微信占比'] = np.where(temp['d6_总复制微信'] == 0, 0, temp['d6_复制微信']/temp['d6_总复制微信'])
    temp['d8_复制微信占比'] = np.where(temp['d8_总复制微信'] == 0, 0, temp['d8_复制微信']/temp['d8_总复制微信'])
    temp['d0_报名占比'] = np.where(temp['d0_总报名'] == 0, 0, temp['d0_报名']/temp['d0_总报名'])
    temp['d3_报名占比'] = np.where(temp['d3_总报名'] == 0, 0, temp['d3_报名']/temp['d3_总报名'])
    temp['d5_报名占比'] = np.where(temp['d5_总报名'] == 0, 0, temp['d5_报名']/temp['d5_总报名'])
    temp['d6_报名占比'] = np.where(temp['d6_总报名'] == 0, 0, temp['d6_报名']/temp['d6_总报名'])
    temp['d8_报名占比'] = np.where(temp['d8_总报名'] == 0, 0, temp['d8_报名']/temp['d8_总报名'])
    temp['d0_入群占比'] = np.where(temp['d0_总入群'] == 0, 0, temp['d0_入群']/temp['d0_总入群'])
    temp['d3_入群占比'] = np.where(temp['d3_总入群'] == 0, 0, temp['d3_入群']/temp['d3_总入群'])
    temp['d5_入群占比'] = np.where(temp['d5_总入群'] == 0, 0, temp['d5_入群']/temp['d5_总入群'])
    temp['d6_入群占比'] = np.where(temp['d6_总入群'] == 0, 0, temp['d6_入群']/temp['d6_总入群'])
    temp['d8_入群占比'] = np.where(temp['d8_总入群'] == 0, 0, temp['d8_入群']/temp['d8_总入群'])
    return temp

In [6]:
# generate sql
sql_w_param = sql.format(**{'start_date': start_date, 'end_date': end_date})

# refresh client
app_id = 'lFKW9WPzA2tHT7Bv3HuNH2UnIonYG75hnWR6maHVo7YYIXqm'
app_key = 'wTmX8lGFWeFFgROnTzOgb9uIrzrTLeDTtPil0LADDYeOayQo'
user_name = 'wufei.97'

# run sql
client = bytedtqs.TQSClient(app_id=app_id, app_key=app_key)
hive_job = client.execute_query(user_name=user_name, query=sql_w_param)

# result file url
hive_results = hive_job.get_result().result_url

[2020-11-26 18:33:24,070] - [INFO] - job submitted, job_id: 158734248
[2020-11-26 18:33:24,098] - [INFO] - job_id: 158734248, engine_type: Hive, status: Created
[2020-11-26 18:33:26,132] - [INFO] - job_id: 158734248, engine_type: Hive, status: Created
[2020-11-26 18:33:28,161] - [INFO] - job_id: 158734248, engine_type: SparkCli, status: Processing
[2020-11-26 18:33:30,199] - [INFO] - job_id: 158734248, engine_type: SparkCli, status: Processing
[2020-11-26 18:33:32,229] - [INFO] - job_id: 158734248, engine_type: SparkCli, status: Processing
[2020-11-26 18:33:34,255] - [INFO] - job_id: 158734248, engine_type: SparkCli, status: Processing
[2020-11-26 18:33:36,848] - [INFO] - job_id: 158734248, engine_type: SparkCli, status: Processing
[2020-11-26 18:33:38,885] - [INFO] - job_id: 158734248, engine_type: SparkCli, status: Processing, tracking_urls: http://n11-043-024.byted.org:8060/proxy/application_1606292494524_121534/
[2020-11-26 18:33:40,918] - [INFO] - job_id: 158734248, engine_type: S

In [7]:
# fetch results & filter app name
df_raw = pd.read_csv(hive_results)

Shapley Value 分析算法与结果展示。

In [8]:
class ShapAnalysis:
    
    def __init__(self, df, date_base, date_curr, dim, var, metric, weight, func):
        self.df = df
        self.date_base = date_base
        self.date_curr = date_curr
        self.dim = dim
        self.var = var
        self.metric = metric
        self.weight = weight
        self.func = func
        # placeholders
        self.metric_base = 0
        self.metric_curr = 0
        self.metric_delta = 0
    
    def process_data(self):
        # generate df
        df_base = custom_results(self.df, self.dim, self.date_base)
        df_curr = custom_results(self.df, self.dim, self.date_curr)

        # fill NA
        df_base[self.dim] = df_base[self.dim].astype(str).fillna('_')
        df_curr[self.dim] = df_curr[self.dim].astype(str).fillna('_')
        df_base[self.var] = df_base[self.var].fillna(0)
        df_curr[self.var] = df_curr[self.var].fillna(0)

        # select required columns
        df_base = df_base[self.dim + self.var]
        df_curr = df_curr[self.dim + self.var]

        # combine dimensions into a tuple
        self.new_dim_col = '_dim'
        df_base[self.new_dim_col] = df_base[self.dim].apply(tuple, axis=1)
        df_curr[self.new_dim_col] = df_curr[self.dim].apply(tuple, axis=1)

        # drop old dim cols
        df_base, df_curr = df_base.drop(self.dim, axis=1), df_curr.drop(self.dim, axis=1)

        # find the set of all dim values
        self.dim_uniq = pd.concat([df_base[self.new_dim_col], df_curr[self.new_dim_col]]).unique()

        # make sure both dataframes have records for all dim values
        for d in self.dim_uniq:
            new_row = dict()
            new_row[self.new_dim_col] = d
            for v in self.var:
                new_row[v] = 0
            # tuple in set
            if d not in set(df_base[self.new_dim_col].values):
                df_base = df_base.append(new_row, ignore_index=True)
            if d not in set(df_curr[self.new_dim_col].values):
                df_curr = df_curr.append(new_row, ignore_index=True)
        
        self.df_base = df_base
        self.df_curr = df_curr
        
        # calc overall metrics
        self.metric_base, self.metric_curr = \
            self.func(self.df_base), self.func(self.df_curr)
        self.metric_delta = self.metric_curr - self.metric_base

        
    def analysis(self, sample_size=2):
        # players: dim x variable
        players = [(i, j) for i in range(len(self.dim_uniq)) for j in range(len(self.var))]

        # sample
        N = len(players)
        sample_size = min(sample_size, factorial(N))
        seq_list = list()
        random.seed(666)
        
        for _ in range(sample_size):
            seq = list(range(N))
            random.shuffle(seq)
            seq_list.append(seq)
            
        self.phi = dict()
        
        # reuse the same set of sequences for all players        
        for seq in seq_list:
            # make of copy of ctl
            df_s = self.df_base.copy()
            # current utility
            v_current = self.func(df_s)

            for i in range(N):
                # select player p
                p = players[seq[i]]
                # select dim and variable
                d, v = self.dim_uniq[p[0]], self.var[p[1]]
                # update df_s
                df_s.loc[lambda x: x[self.new_dim_col]==d, v] = \
                    self.df_curr.loc[lambda x: x[self.new_dim_col]==d, v].values
                # calculate marginal utility
                v_si = self.func(df_s)
                phi_i = v_si - v_current
                # update current utility
                v_current = v_si

                # add utility for player p
                if p in self.phi:
                    self.phi[p] += phi_i
                else:
                    self.phi[p] = phi_i
        
    
    def process_results(self):
        # standardize (because of sampling)
        phi_std = {k:1.0*self.metric_delta*v/sum(self.phi.values()) for k, v in self.phi.items()}

        # save contribution of each player
        self.con = \
        [{'维度':self.dim_uniq[k[0]], 
          '变量':self.var[k[1]],
          '贡献': v,
         } for k, v in phi_std.items()]

            
    def display_contribution(self):
        # contribution by dim combination
        con_by_dim = pd.DataFrame(self.con)
        
        # contribution by var
        con_by_var = con_by_dim.\
            groupby('变量')['贡献'].sum().reset_index()
        con_by_var['贡献权重'] = con_by_var['贡献']/self.metric_delta
        print("每个变量的整体贡献:")
        display(
            con_by_var.style.hide_index().\
            background_gradient(
                subset=pd.IndexSlice[:,['贡献权重']],
                # cmap='plasma',
                cmap='viridis',
                    ).\
            format({
                    '贡献':'{:.4f}',
                    '贡献权重':'{:.2%}',
                })
        )
        
        # split dim tuple into separate dims
        df_con_split = pd.concat(
            [
                pd.DataFrame(
                    con_by_dim['维度'].tolist(), 
                    columns=self.dim
                ), 
                con_by_dim
            ], 
            axis=1,
        )
        print("每个变量贡献，按不同维度分解:")
        
        for d in self.dim:
            print(f"\n{'-'*10} 维度: {d}")
            # aggregate by dimension to provide metrics
            df_base_d = custom_results(self.df, d, self.date_base)
            df_curr_d = custom_results(self.df, d, self.date_curr)
            df_con_by_var = list()
            
            for v in self.var:
                # print(f"{'-'*2} 指标: {v}. 累计贡献 {con_by_var[lambda x: x['变量']==v]['贡献'].values[0] :.4f}")
                df_con_grouped = \
                    df_con_split[lambda x: x['变量']==v].\
                    groupby(d)[['贡献']].sum().\
                    merge(df_base_d[[d, self.weight]], on=[d]).rename(columns={self.weight: '群体比重'}).\
                    merge(df_base_d[[d, v]], on=[d]).rename(columns={v: '基期'}).\
                    merge(df_curr_d[[d, v]], on=[d]).rename(columns={v: '现期'})
                # df_con_grouped['贡献权重'] = df_con_grouped['贡献']/df_con_grouped['贡献'].sum()
                df_con_grouped['群体比重'] = df_con_grouped['群体比重']/df_con_grouped['群体比重'].sum()
                # df_con_grouped['重要度'] = np.abs(df_con_grouped['贡献权重'])/df_con_grouped['群体权重']
                df_con_grouped['指标'] = v
                # append to the list
                df_con_by_var.append(df_con_grouped)
            
            # combine df for all vars and sort by contribution
            df_con_all_vars = pd.concat(df_con_by_var, axis=0).\
                reset_index(drop=True).\
                sort_values(by='贡献',ascending=True if self.metric_delta<0 else False)
            # add percentage metrics
            df_con_all_vars['贡献比重'] = df_con_all_vars['贡献']/self.metric_delta
            # add contribution per percent
            df_con_all_vars['单位贡献'] = np.abs(df_con_all_vars['贡献'])/df_con_all_vars['群体比重']
            df_con_all_vars['单位贡献'] = np.where(df_con_all_vars['群体比重'] >= 0.05, df_con_all_vars['单位贡献'], 0)
            # sort columns manually
            df_con_all_vars = df_con_all_vars[[d,'指标','基期','现期','贡献','贡献比重','群体比重','单位贡献']]
            # print(df_con_all_vars)
            
            # display
            display(
                df_con_all_vars.style.hide_index().\
                    background_gradient(
                        subset=pd.IndexSlice[:,['贡献比重','群体比重','单位贡献']],
                        cmap='viridis',
                ).\
                    format({
                    '基期':'{:.4f}',
                    '现期':'{:.4f}',
                    '贡献':'{:.4f}',
                    '贡献比重':'{:.2%}',
                    '群体比重':'{:.2%}',
                    '单位贡献':'{:.4f}'
                })
            )

In [91]:
# class ShapAnalysis:
    
#     def __init__(self, df, date_base, date_curr, dim, var, metric, weight, func):
#         self.df = df
#         self.date_base = date_base
#         self.date_curr = date_curr
#         self.dim = dim
#         self.var = var
#         self.metric = metric
#         self.weight = weight
#         self.func = func
#         # placeholders
#         self.metric_base = 0
#         self.metric_curr = 0
#         self.metric_delta = 0
    
#     def process_data(self):
#         # generate df
#         df_base = custom_results(self.df, self.dim, self.date_base)
#         df_curr = custom_results(self.df, self.dim, self.date_curr)

#         # fill NA
#         df_base[self.dim] = df_base[self.dim].astype(str).fillna('_')
#         df_curr[self.dim] = df_curr[self.dim].astype(str).fillna('_')
#         df_base[self.var] = df_base[self.var].fillna(0)
#         df_curr[self.var] = df_curr[self.var].fillna(0)

#         # select required columns
#         df_base = df_base[self.dim + self.var]
#         df_curr = df_curr[self.dim + self.var]

#         # combine dimensions into a tuple
#         self.new_dim_col = '_dim'
#         df_base[self.new_dim_col] = df_base[self.dim].apply(tuple, axis=1)
#         df_curr[self.new_dim_col] = df_curr[self.dim].apply(tuple, axis=1)

#         # drop old dim cols
#         df_base, df_curr = df_base.drop(self.dim, axis=1), df_curr.drop(self.dim, axis=1)

#         # find the set of all dim values
#         self.dim_uniq = pd.concat([df_base[self.new_dim_col], df_curr[self.new_dim_col]]).unique()

#         # make sure both dataframes have records for all dim values
#         for d in self.dim_uniq:
#             new_row = dict()
#             new_row[self.new_dim_col] = d
#             for v in self.var:
#                 new_row[v] = 0
#             # tuple in set
#             if d not in set(df_base[self.new_dim_col].values):
#                 df_base = df_base.append(new_row, ignore_index=True)
#             if d not in set(df_curr[self.new_dim_col].values):
#                 df_curr = df_curr.append(new_row, ignore_index=True)
        
#         self.df_base = df_base
#         self.df_curr = df_curr
        
#         # calc overall metrics
#         self.metric_base, self.metric_curr = \
#             self.func(self.df_base), self.func(self.df_curr)
#         self.metric_delta = self.metric_curr - self.metric_base

        
#     def analysis(self, sample_size=2):
#         # players: dim x variable
#         players = [(i, j) for i in range(len(self.dim_uniq)) for j in range(len(self.var))]

#         # sample
#         N = len(players)
#         sample_size = min(sample_size, factorial(N))
#         seq_list = list()
#         random.seed(666)
        
#         for _ in range(sample_size):
#             seq = list(range(N))
#             random.shuffle(seq)
#             seq_list.append(seq)
            
#         self.phi = dict()
        
#         # reuse the same set of sequences for all players        
#         for seq in seq_list:
#             # make of copy of ctl
#             df_s = self.df_base.copy()
#             # current utility
#             v_current = self.func(df_s)

#             for i in range(N):
#                 # select player p
#                 p = players[seq[i]]
#                 # select dim and variable
#                 d, v = self.dim_uniq[p[0]], self.var[p[1]]
#                 # update df_s
#                 df_s.loc[lambda x: x[self.new_dim_col]==d, v] = \
#                     self.df_curr.loc[lambda x: x[self.new_dim_col]==d, v].values
#                 # calculate marginal utility
#                 v_si = self.func(df_s)
#                 phi_i = v_si - v_current
#                 # update current utility
#                 v_current = v_si

#                 # add utility for player p
#                 if p in self.phi:
#                     self.phi[p] += phi_i
#                 else:
#                     self.phi[p] = phi_i
        
    
#     def process_results(self):
#         # standardize (because of sampling)
#         phi_std = {k:1.0*self.metric_delta*v/sum(self.phi.values()) for k, v in self.phi.items()}

#         # save contribution of each player
#         self.con = \
#         [{'维度':self.dim_uniq[k[0]], 
#           '变量':self.var[k[1]],
#           '贡献': v,
#          } for k, v in phi_std.items()]

            
#     def display_contribution(self):
#         # contribution by dim combination
#         con_by_dim = pd.DataFrame(self.con)
        
#         # contribution by var
#         con_by_var = con_by_dim.\
#             groupby('变量')['贡献'].sum().reset_index()
#         con_by_var['贡献权重'] = con_by_var['贡献']/self.metric_delta
#         print("每个变量的整体贡献:")
#         display(
#             con_by_var.style.hide_index().\
#             background_gradient(
#                 subset=pd.IndexSlice[:,['贡献权重']],
#                 # cmap='plasma',
#                 cmap='viridis',
#                     ).\
#             format({
#                     '贡献':'{:.4f}',
#                     '贡献权重':'{:.2%}',
#                 })
#         )
        
#         # split dim tuple into separate dims
#         df_con_split = pd.concat(
#             [
#                 pd.DataFrame(
#                     con_by_dim['维度'].tolist(), 
#                     columns=self.dim
#                 ), 
#                 con_by_dim
#             ], 
#             axis=1,
#         )
#         print("每个变量贡献，按不同维度分解:")
        
#         for d in self.dim:
#             print(f"\n{'-'*10} 维度: {d}")
#             # aggregate by dimension to provide metrics
#             df_base_d = custom_results(self.df, d, self.date_base)
#             df_curr_d = custom_results(self.df, d, self.date_curr)
#             df_con_by_var = list()
            
#             for v in self.var:
#                 # print(f"{'-'*2} 指标: {v}. 累计贡献 {con_by_var[lambda x: x['变量']==v]['贡献'].values[0] :.4f}")
#                 df_con_grouped = \
#                     df_con_split[lambda x: x['变量']==v].\
#                     groupby(d)[['贡献']].sum().\
#                     merge(df_base_d[[d, self.weight]], on=[d]).rename(columns={self.weight: '群体比重'}).\
#                     merge(df_base_d[[d, v]], on=[d]).rename(columns={v: '基期'}).\
#                     merge(df_curr_d[[d, v]], on=[d]).rename(columns={v: '现期'})
#                 # df_con_grouped['贡献权重'] = df_con_grouped['贡献']/df_con_grouped['贡献'].sum()
#                 df_con_grouped['群体比重'] = df_con_grouped['群体比重']/df_con_grouped['群体比重'].sum()
#                 # df_con_grouped['重要度'] = np.abs(df_con_grouped['贡献权重'])/df_con_grouped['群体权重']
#                 df_con_grouped['指标'] = v
#                 # append to the list
#                 df_con_by_var.append(df_con_grouped)
            
#             # combine df for all vars and sort by contribution
#             df_con_all_vars = pd.concat(df_con_by_var, axis=0).\
#                 reset_index(drop=True).\
#                 sort_values(by='贡献',ascending=True if self.metric_delta<0 else False)
#             # add percentage metrics
#             df_con_all_vars['贡献比重'] = df_con_all_vars['贡献']/self.metric_delta
#             # add contribution per percent
#             df_con_all_vars['单位贡献'] = np.abs(df_con_all_vars['贡献'])/df_con_all_vars['群体比重']
#             df_con_all_vars['单位贡献'] = np.where(df_con_all_vars['群体比重'] >= 0.05, df_con_all_vars['单位贡献'], 0)
#             # sort columns manually
#             df_con_all_vars = df_con_all_vars[[d,'指标','基期','现期','贡献','贡献比重','群体比重','单位贡献']]
#             # print(df_con_all_vars)
            
#             # display
#             display(
#                 df_con_all_vars.style.hide_index().\
#                     background_gradient(
#                         subset=pd.IndexSlice[:,['贡献比重','群体比重','单位贡献']],
#                         cmap='viridis',
#                 ).\
#                     format({
#                     '基期':'{:.4f}',
#                     '现期':'{:.4f}',
#                     '贡献':'{:.4f}',
#                     '贡献比重':'{:.2%}',
#                     '群体比重':'{:.2%}',
#                     '单位贡献':'{:.4f}'
#                 })
#             )

## 分析过程

每项配置对应一个分析任务。配置说明：

- date_base 参照数据起止日期
- date_curr 当前数据起止日期
- dim 分析维度
- var 用于计算指标的变量
- metric 指标名称，可以随便取
- weight 权重变量，通常选取用户数或用户占比
- func 从 var 计算 metric 的 Python 函数

配置可自行增改。

In [13]:
# config

# 新增入群率
d0_入群率_cust = {
    'date_base': ['2020-11-11', '2020-11-17'],
    'date_curr': ['2020-11-18', '2020-11-24'],
    'dim': ['channel'],
    'var': ['新增占比', 'd0_新增到报名', 'd0_报名到复制', 'd0_复制到入群'],
    'metric': 'd0_入群率',
    'weight': '新增占比',
    'func': lambda x: sum(x['新增占比']*x['d0_新增到报名']*x['d0_报名到复制']*x['d0_复制到入群']),
}

d0_入群率_dod = {
    'date_base': dod,
    'date_curr': ytd,
    'dim': ['channel', 'city_level'],
    'var': ['新增占比', 'd0_新增到报名', 'd0_报名到复制', 'd0_复制到入群'],
    'metric': 'd0_入群率',
    'weight': '新增占比',
    'func': lambda x: sum(x['新增占比']*x['d0_新增到报名']*x['d0_报名到复制']*x['d0_复制到入群']),
}

d0_入群率_wow = {
    'date_base': wow,
    'date_curr': ytd,
    'dim': ['channel', 'city_level'],
    'var': ['新增占比', 'd0_新增到报名', 'd0_报名到复制', 'd0_复制到入群'],
    'metric': 'd0_入群率',
    'weight': '新增占比',
    'func': lambda x: sum(x['新增占比']*x['d0_新增到报名']*x['d0_报名到复制']*x['d0_复制到入群']),
}

# 群内转化
d3_群内转化率_dod = {
    'date_base': dod_3,
    'date_curr': ytd_3,
    'dim': ['occupation', 'channel', 'city_level', 'first_level_name'],
    'var': ['d3_入群占比','d3_群内转化率'],
    'metric': 'd3_群内转化率',
    'weight': 'd3_入群占比',
    'func': lambda x: sum(x['d3_入群占比']*x['d3_群内转化率']),
}

d3_群内转化率_wow = {
    'date_base': wow_3,
    'date_curr': ytd_3,
    'dim': ['occupation', 'channel', 'city_level', 'first_level_name'],
    'var': ['d3_入群占比','d3_群内转化率'],
    'metric': 'd3_群内转化率',
    'weight': 'd3_入群占比',
    'func': lambda x: sum(x['d3_入群占比']*x['d3_群内转化率']),
}

d6_群内转化率_dod = {
    'date_base': dod_6,
    'date_curr': ytd_6,
    'dim': ['occupation', 'channel', 'city_level', 'first_level_name','age'],
    'var': ['d6_入群占比','d6_群内转化率'],
    'metric': 'd6_群内转化率',
    'weight': 'd6_入群占比',
    'func': lambda x: sum(x['d6_入群占比']*x['d6_群内转化率']),
}

d6_群内转化率_wow = {
    'date_base': wow_6,
    'date_curr': ytd_6,
    'dim': ['occupation', 'channel', 'city_level', 'first_level_name','age'],
    'var': ['d6_入群占比','d6_群内转化率'],
    'metric': 'd6_群内转化率',
    'weight': 'd6_入群占比',
    'func': lambda x: sum(x['d6_入群占比']*x['d6_群内转化率']),
}

d8_群内转化率_dod = {
    'date_base': dod_8,
    'date_curr': ytd_8,
    'dim': ['occupation', 'channel', 'city_level', 'first_level_name'],
    'var': ['d8_入群占比','d8_群内转化率'],
    'metric': 'd8_群内转化率',
    'weight': 'd8_入群占比',
    'func': lambda x: sum(x['d8_入群占比']*x['d8_群内转化率']),
}

d8_群内转化率_wow = {
    'date_base': wow_8,
    'date_curr': ytd_8,
    'dim': ['occupation', 'channel', 'city_level', 'first_level_name'],
    'var': ['d8_入群占比','d8_群内转化率'],
    'metric': 'd8_群内转化率',
    'weight': 'd8_入群占比',
    'func': lambda x: sum(x['d8_入群占比']*x['d8_群内转化率']),
}

# 学习率
d3_学习率 = {
    'date_base': ['2020-10-31','2020-11-02'],
    'date_curr': ['2020-11-03','2020-11-05'],
    'dim': ['os', 'occupation', 'channel', 'city_level', 'first_level_name'],
    'var': ['新增占比', 'd3_学习率'],
    'metric': 'd3_学习率',
    'weight': '新增',
    'func': lambda x: sum(x['新增占比']*x['d3_学习率'])
}

In [14]:
# 顺序分析
for app_name in ('eo', 'ez'):
    
    for conf in [
#         d0_入群率_dod,
#         d0_入群率_wow,
#         d3_群内转化率_dod,
#         d3_群内转化率_wow,
#         d8_群内转化率_dod,
#         d8_群内转化率_wow,
#         d6_群内转化率_dod,
#         d6_群内转化率_wow,
        d0_入群率_cust,
    ]:
        print(f"{'-'*30} {(app_name.upper())} 的 {conf['metric']} 指标 {'-'*30}")
        s = ShapAnalysis(df_raw[lambda x: x['app_name'] == app_name], **conf)
        s.process_data()
        print(
            f"日期从 {conf['date_base']} 到 {conf['date_curr']}\n指标从 {s.metric_base: .4f} 到 {s.metric_curr: .4f}，" 
            f"绝对变化: {s.metric_delta :.4f} 相对变化: {s.metric_delta/s.metric_base :.2%}"
        )
        
        # 相对变化到达一定值才分析
        if abs(s.metric_curr/s.metric_base-1) >= 0.05:
            s.analysis(sample_size=15)
            s.process_results()
            s.display_contribution()

------------------------------ EO 的 d0_入群率 指标 ------------------------------
日期从 ['2020-11-11', '2020-11-17'] 到 ['2020-11-18', '2020-11-24']
指标从  0.1498 到  0.1399，绝对变化: -0.0099 相对变化: -6.63%
每个变量的整体贡献:


变量,贡献,贡献权重
d0_复制到入群,-0.0031,31.70%
d0_报名到复制,0.0004,-3.76%
d0_新增到报名,-0.0053,53.14%
新增占比,-0.0019,18.91%


每个变量贡献，按不同维度分解:

---------- 维度: channel


channel,指标,基期,现期,贡献,贡献比重,群体比重,单位贡献
内广,d0_新增到报名,0.5086,0.4875,-0.0042,42.47%,51.03%,0.0083
广点通,新增占比,0.0947,0.0727,-0.0042,41.88%,9.47%,0.0439
内广,d0_复制到入群,0.4371,0.4217,-0.0035,35.20%,51.03%,0.0068
内广,新增占比,0.5103,0.4985,-0.0023,22.93%,51.03%,0.0045
华为小米,d0_新增到报名,0.2892,0.2669,-0.0011,10.96%,13.22%,0.0082
广点通,d0_复制到入群,0.4577,0.4274,-0.001,10.54%,9.47%,0.0111
Apple Store,d0_新增到报名,0.2985,0.2574,-0.001,9.76%,4.96%,0.0
其他,d0_新增到报名,0.2795,0.1964,-0.001,9.74%,1.82%,0.0
Vivo,新增占比,0.1529,0.1317,-0.0008,7.99%,15.29%,0.0052
Apple Store,d0_报名到复制,0.934,0.8877,-0.0003,3.51%,4.96%,0.0


------------------------------ EZ 的 d0_入群率 指标 ------------------------------
日期从 ['2020-11-11', '2020-11-17'] 到 ['2020-11-18', '2020-11-24']
指标从  0.0963 到  0.1071，绝对变化: 0.0107 相对变化: 11.13%
每个变量的整体贡献:


变量,贡献,贡献权重
d0_复制到入群,0.002,19.03%
d0_报名到复制,-0.0029,-26.66%
d0_新增到报名,0.0072,67.16%
新增占比,0.0043,40.47%


每个变量贡献，按不同维度分解:

---------- 维度: channel


channel,指标,基期,现期,贡献,贡献比重,群体比重,单位贡献
其他,d0_新增到报名,0.1193,0.1828,0.0045,41.61%,20.67%,0.0216
Apple Store,新增占比,0.157,0.1801,0.0027,25.37%,15.70%,0.0173
Google-FB,新增占比,0.0649,0.0862,0.0022,20.94%,6.49%,0.0346
内广,新增占比,0.0315,0.04,0.0022,20.17%,3.15%,0.0
华为小米,新增占比,0.2856,0.2967,0.0014,13.18%,28.56%,0.0049
Apple Store,d0_新增到报名,0.2253,0.2398,0.0012,11.33%,15.70%,0.0077
华为小米,d0_复制到入群,0.6987,0.7204,0.0011,10.62%,28.56%,0.004
华为小米,d0_新增到报名,0.2844,0.2921,0.001,9.34%,28.56%,0.0035
Apple Store,d0_复制到入群,0.7256,0.7582,0.0009,8.32%,15.70%,0.0057
Vivo,新增占比,0.144,0.1598,0.0008,7.53%,14.40%,0.0056
