In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
# 让画图内置
%matplotlib inline

# 忽略版本号之类的warnings
import warnings
warnings.filterwarnings('ignore')
import os
from datetime import date

# 数据导入

In [None]:
# 导入数据
# original_data = pd.read_csv('data\processed_data.csv')
# data = original_data.copy()
# data.shape

In [2]:
# 导入数据
data = pd.read_csv('data\processed_data.csv')
data.shape

(42413557, 11)

In [3]:
data['event_time'] = pd.to_datetime(data['event_time'])
data['category_code'] = data['category_code'].astype('category')
data['brand'] = data['brand'].astype('category')

data['event_day'] = data['event_time'].dt.date
data['event_hour'] = data['event_time'].dt.hour

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42413557 entries, 0 to 42413556
Data columns (total 13 columns):
 #   Column         Dtype         
---  ------         -----         
 0   event_time     datetime64[ns]
 1   event_type     object        
 2   product_id     int64         
 3   category_id    int64         
 4   category_code  category      
 5   brand          category      
 6   price          float64       
 7   user_id        int64         
 8   category       object        
 9   sub_category   object        
 10  product_name   object        
 11  event_day      object        
 12  event_hour     int32         
dtypes: category(2), datetime64[ns](1), float64(1), int32(1), int64(3), object(5)
memory usage: 3.4+ GB


In [36]:
data.head()

Unnamed: 0,event_time,event_type,product_id,category_id,category_code,brand,price,user_id,category,sub_category,product_name,event_day,event_hour,event_weekday
0,2019-10-01 00:00:00,view,44600062,2103807459595387724,,shiseido,35.79,541312140,,,,2019-10-01,0,1
1,2019-10-01 00:00:00,view,3900821,2053013552326770905,appliances.environment.water_heater,aqua,33.2,554748717,appliances,environment,water_heater,2019-10-01,0,1
2,2019-10-01 00:00:01,view,17200506,2053013559792632471,furniture.living_room.sofa,,543.1,519107250,furniture,living_room,sofa,2019-10-01,0,1
3,2019-10-01 00:00:01,view,1307067,2053013558920217191,computers.notebook,lenovo,251.74,550050854,computers,notebook,notebook,2019-10-01,0,1
4,2019-10-01 00:00:04,view,1004237,2053013555631882655,electronics.smartphone,apple,1081.98,535871217,electronics,smartphone,smartphone,2019-10-01,0,1


# 总体运营指标

In [None]:
# pv uv
data.query('event_type == "view"').groupby('event_type').agg(PV = ('user_id','count'),
              UV = ('user_id','nunique'))

Unnamed: 0_level_0,PV,UV
event_type,Unnamed: 1_level_1,Unnamed: 2_level_1
view,40772341,3022130


In [None]:
# 订单数 gmv
data.query('event_type == "purchase"').groupby('event_type').agg(order_num = ('user_id','count'),
                                    gmv = ('price','sum'))

Unnamed: 0_level_0,order_num,order_user_num,ordered_product_num,gmv
event_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
purchase,742773,347118,42241,229933200.0


In [None]:
# 各行为计数
behavior_info = data.groupby('event_type').agg({'event_type': 'count'}).unstack().unstack()
behavior_info['conversion_rate'] = behavior_info['purchase'] / behavior_info['view']
behavior_info

event_type,cart,purchase,view,conversion_rate
event_type,898443,742773,40772341,0.018218


In [None]:
# 下单数,动销商品数,gmv,单均价
product_info = data.query('event_type == "purchase"').groupby('event_type').agg({'product_id': ['count','nunique'],'price': ['sum','mean']})
product_info.columns = ['下单数','动销商品数','gmv','单均价']
product_info

Unnamed: 0_level_0,下单数,动销商品数,gmv,单均价
event_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
purchase,742773,42241,229933200.0,309.560542


# 平台经营

## PV UV

In [71]:
view_info  = data.query('event_type == "view"')
puv = view_info.groupby('event_type').agg(PV = ('user_id','count'),
              UV = ('user_id','nunique'))
puv

Unnamed: 0_level_0,PV,UV
event_type,Unnamed: 1_level_1,Unnamed: 2_level_1
view,40772341,3022130


### 分日

In [70]:
view_info.groupby(['event_day']).agg(PV = ('user_id','count'),
              UV = ('user_id','nunique'))

Unnamed: 0_level_0,PV,UV
event_day,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-10-01,1208165,190158
2019-10-02,1154436,184955
2019-10-03,1088577,170655
2019-10-04,1346162,209393
2019-10-05,1271176,194949
2019-10-06,1263876,193194
2019-10-07,1160929,186939
2019-10-08,1328893,221194
2019-10-09,1306089,214133
2019-10-10,1242891,205307


### 分时

In [42]:
view_info.groupby(['event_hour']).agg(PV = ('user_id','count'),
              UV = ('user_id','nunique'))


Unnamed: 0_level_0,PV,UV
event_hour,Unnamed: 1_level_1,Unnamed: 2_level_1
0,299740,59584
1,545513,116376
2,1036894,209100
3,1485498,290737
4,1826073,351996
5,2020202,387131
6,2154159,413004
7,2217399,426678
8,2267568,432146
9,2228945,421753


## 下单数 gmv 客单价

In [9]:
# 订单数 gmv
order_info  = data.query('event_type == "purchase"')
order_info.groupby('event_type').agg(order_num = ('user_id','count'),
                                    gmv = ('price','sum'),
                                    ATV = ('price','mean'))

Unnamed: 0_level_0,order_num,gmv
event_type,Unnamed: 1_level_1,Unnamed: 2_level_1
purchase,742773,229933200.0


### 分日

In [10]:
order_info_byday= order_info.groupby('event_day').agg(order_num = ('user_id','count'),
                                    gmv = ('price','sum'))
order_info_byday

Unnamed: 0_level_0,order_num,gmv
event_day,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-10-01,19305,6275579.06
2019-10-02,19469,6213628.53
2019-10-03,19255,6233782.98
2019-10-04,27039,8623058.19
2019-10-05,23492,7341094.46
2019-10-06,22169,6737258.17
2019-10-07,21378,6348189.06
2019-10-08,23071,6819701.26
2019-10-09,22747,6855326.05
2019-10-10,21992,6665413.21


### 分时

In [12]:
order_info_byhour= order_info.groupby('event_hour').agg(order_num = ('user_id','count'),
                                    gmv = ('price','sum'))
order_info_byhour

Unnamed: 0_level_0,order_num,gmv
event_hour,Unnamed: 1_level_1,Unnamed: 2_level_1
0,2949,980317.85
1,5526,1577532.87
2,13968,3873027.44
3,29632,8696837.45
4,41143,12064072.73
5,48068,14398553.24
6,52002,15808753.76
7,53404,16338607.12
8,55195,17058580.92
9,55182,17551669.73


# 销售转化
- 只考虑行为次数， 不考虑 价格/人数/商品数

## 行为数 下单转化率

In [82]:
# 各行为计数
behavior_info = data.groupby('event_type').agg({'event_type': 'count'}).unstack().unstack()
behavior_info['conversion_rate'] = behavior_info['purchase'] / behavior_info['view']
behavior_info

event_type,cart,purchase,view,conversion_rate
event_type,898443,742773,40772341,0.018218


### 分日 

In [5]:
behavior_info_byday = data.groupby(['event_day', 'event_type']).agg({'event_type': 'count'}).unstack()
behavior_info_byday.columns = ['cart','purchase','view']
behavior_info_byday['conversion_rate'] = (behavior_info_byday['purchase'] / behavior_info_byday['view']*100).astype(str).apply(lambda x: x[:4]) + '%'
behavior_info_byday

Unnamed: 0_level_0,cart,purchase,view,conversion_rate
event_day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-10-01,16193,19305,1208165,1.59%
2019-10-02,16871,19469,1154436,1.68%
2019-10-03,18702,19255,1088577,1.76%
2019-10-04,42657,27039,1346162,2.00%
2019-10-05,34564,23492,1271176,1.84%
2019-10-06,30995,22169,1263876,1.75%
2019-10-07,17570,21378,1160929,1.84%
2019-10-08,17952,23071,1328893,1.73%
2019-10-09,17886,22747,1306089,1.74%
2019-10-10,18336,21992,1242891,1.76%


### 分时

In [6]:
behavior_info_byhour = data.groupby(['event_hour', 'event_type']).agg({'event_type': 'count'}).unstack()
behavior_info_byhour.columns = ['cart','purchase','view']
behavior_info_byhour['conversion_rate'] = (behavior_info_byhour['purchase'] / behavior_info_byhour['view']*100).astype(str).apply(lambda x: x[:4]) + '%'
behavior_info_byhour

Unnamed: 0_level_0,cart,purchase,view,conversion_rate
event_hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,3869,2949,299740,0.98%
1,7525,5526,545513,1.01%
2,17279,13968,1036894,1.34%
3,33642,29632,1485498,1.99%
4,46636,41143,1826073,2.25%
5,55476,48068,2020202,2.37%
6,60607,52002,2154159,2.41%
7,62460,53404,2217399,2.40%
8,65016,55195,2267568,2.43%
9,65031,55182,2228945,2.47%


### 分品牌

In [21]:
behavior_info_bybrand = data.groupby(['brand', 'event_type']).agg({'event_type': 'count'}).unstack()
behavior_info_bybrand.columns = ['cart','purchase','view']
behavior_info_bybrand['conversion_rate'] = (behavior_info_bybrand['purchase'] / behavior_info_bybrand['view']*100).astype(str).apply(lambda x: x[:4]) + '%'
behavior_info_bybrand.query('purchase > 0').sort_values(by='conversion_rate', ascending=False)

Unnamed: 0_level_0,cart,purchase,view,conversion_rate
brand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
faans,0,2,22,9.09%
alumet,0,1,11,9.09%
rimmel,0,1,11,9.09%
crusader,0,1,11,9.09%
unikum,0,3,36,8.33%
...,...,...,...,...
dometic,0,3,7954,0.03%
betsy,0,1,2815,0.03%
agl,0,2,7026,0.02%
ezetil,0,1,4353,0.02%


### 分类别

In [16]:
behavior_info_bycategory = data.groupby(['category', 'event_type']).agg({'event_type': 'count'}).unstack()
behavior_info_bycategory.columns = ['cart','purchase','view']
behavior_info_bycategory['conversion_rate'] = (behavior_info_bycategory['purchase'] / behavior_info_bycategory['view']*100).astype(str).apply(lambda x: x[:4]) + '%'
behavior_info_bycategory.query('purchase > 0').sort_values(by='conversion_rate', ascending=False)

Unnamed: 0_level_0,cart,purchase,view,conversion_rate
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
electronics,653158,422979,15035552,2.81%
medicine,514,310,13974,2.21%
stationery,106,134,7380,1.81%
appliances,89371,74988,4799522,1.56%
computers,26821,27853,2268655,1.22%
construction,10081,7801,712635,1.09%
kids,2534,5482,512260,1.07%
auto,8916,10619,993338,1.06%
sport,1015,1236,174292,0.70%
accessories,537,1587,236098,0.67%


### 分小类

In [18]:
behavior_info_bysubcat = data.groupby(['category', 'sub_category','event_type']).agg({'event_type': 'count'}).unstack()
behavior_info_bysubcat.columns = ['cart','purchase','view']
behavior_info_bysubcat['conversion_rate'] = (behavior_info_bysubcat['purchase'] / behavior_info_bysubcat['view']*100).astype(str).apply(lambda x: x[:4]) + '%'
behavior_info_bysubcat.query('purchase > 0').sort_values(by='conversion_rate', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,cart,purchase,view,conversion_rate
category,sub_category,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
electronics,smartphone,532007.0,337979.0,10617327.0,3.18%
kids,fmcg,98.0,768.0,24199.0,3.17%
electronics,audio,51393.0,35590.0,1486132.0,2.39%
appliances,iron,3967.0,3652.0,157630.0,2.31%
medicine,tools,514.0,310.0,13974.0,2.21%
electronics,video,35174.0,21643.0,1068782.0,2.02%
appliances,ironing_board,350.0,664.0,34264.0,1.93%
electronics,tablet,8809.0,5602.0,301953.0,1.85%
stationery,cartrige,106.0,134.0,7380.0,1.81%
appliances,personal,2679.0,2541.0,144420.0,1.75%


### 分商品

In [19]:
behavior_info_bypname = data.groupby(['product_name','event_type']).agg({'event_type': 'count'}).unstack()
behavior_info_bypname.columns = ['cart','purchase','view']
behavior_info_bypname['conversion_rate'] = (behavior_info_bypname['purchase'] / behavior_info_bypname['view']*100).astype(str).apply(lambda x: x[:4]) + '%'
behavior_info_bypname.query('purchase > 0').sort_values(by='conversion_rate', ascending=False)

Unnamed: 0_level_0,cart,purchase,view,conversion_rate
product_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
smartphone,532007.0,337979.0,10617327.0,3.18%
diapers,98.0,768.0,24199.0,3.17%
headphone,49886.0,30501.0,1018333.0,2.99%
iron,3967.0,3652.0,157630.0,2.31%
microwave,4627.0,3708.0,164944.0,2.24%
...,...,...,...,...
soldering,,1.0,631.0,0.15%
slipons,,8.0,5677.0,0.14%
trousers,,22.0,16091.0,0.13%
jumper,,2.0,2662.0,0.07%


## 用户流量图

In [5]:
sub_data = data[['event_type','user_id','product_id']]
sub_data

Unnamed: 0,event_type,user_id,product_id
0,view,541312140,44600062
1,view,554748717,3900821
2,view,519107250,17200506
3,view,550050854,1307067
4,view,535871217,1004237
...,...,...,...
42413552,view,537931532,2300275
42413553,view,527322328,10800172
42413554,view,566280422,5701038
42413555,view,513118352,21407424


In [6]:
user_route = sub_data.groupby(['product_id','user_id','event_type']).agg({'event_type': 'nunique'}).unstack().fillna(0)
user_route

Unnamed: 0_level_0,Unnamed: 1_level_0,event_type,event_type,event_type
Unnamed: 0_level_1,event_type,cart,purchase,view
product_id,user_id,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1000978,489962790,0.0,0.0,1.0
1000978,490574809,0.0,0.0,1.0
1000978,493091524,0.0,0.0,1.0
1000978,502430435,0.0,0.0,1.0
1000978,503271412,0.0,0.0,1.0
...,...,...,...,...
60500009,562570257,0.0,0.0,1.0
60500009,565573952,0.0,0.0,1.0
60500010,513174842,0.0,0.0,1.0
60500010,544126884,0.0,0.0,1.0


In [7]:
user_route.columns   = ['cart','purchase','view']
user_route = user_route[['view','cart','purchase']]
user_route 

Unnamed: 0_level_0,Unnamed: 1_level_0,view,cart,purchase
product_id,user_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1000978,489962790,1.0,0.0,0.0
1000978,490574809,1.0,0.0,0.0
1000978,493091524,1.0,0.0,0.0
1000978,502430435,1.0,0.0,0.0
1000978,503271412,1.0,0.0,0.0
...,...,...,...,...
60500009,562570257,1.0,0.0,0.0
60500009,565573952,1.0,0.0,0.0
60500010,513174842,1.0,0.0,0.0
60500010,544126884,1.0,0.0,0.0


In [8]:
user_route['rounte'] = user_route.astype(int).astype(str).agg('-'.join, axis=1)
user_route

Unnamed: 0_level_0,Unnamed: 1_level_0,view,cart,purchase,rounte
product_id,user_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1000978,489962790,1.0,0.0,0.0,1-0-0
1000978,490574809,1.0,0.0,0.0,1-0-0
1000978,493091524,1.0,0.0,0.0,1-0-0
1000978,502430435,1.0,0.0,0.0,1-0-0
1000978,503271412,1.0,0.0,0.0,1-0-0
...,...,...,...,...,...
60500009,562570257,1.0,0.0,0.0,1-0-0
60500009,565573952,1.0,0.0,0.0,1-0-0
60500010,513174842,1.0,0.0,0.0,1-0-0
60500010,544126884,1.0,0.0,0.0,1-0-0


In [13]:
# user_route.to_csv('data/user_route1.csv')

In [None]:
user_route['rounte'].value_counts()

rounte
1-0-0    22509736
1-0-1      298539
1-1-1      259544
1-1-0      239130
0-1-0         280
0-0-1         259
0-1-1         142
Name: count, dtype: int64

In [24]:
import plotly.graph_objects as go

# 创建流量图
fig = go.Figure(go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=["PV", "Cart", "Purchase", "Drop-off"],
    ),
    link=dict(
        source=[0, 0, 0, 1, 1],  # PV -> Cart, pv -> Purchase, pv -> Drop-off, Cart -> Purchase, Cart -> Drop-off
        target=[3, 2, 1, 2, 3],
        value=[2250973,298539,498674,259544,239130]
    )
))

fig.update_layout(title_text="用户行为流向图", font_size=10)
fig.show()

In [None]:
# 创建流量图
fig = go.Figure(go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=["PV", "Cart", "Purchase", "Drop-off"],
    ),
    link=dict(
        source=[0, 0,  1, 1],  # PV -> Cart, pv -> Purchase, pv -> Drop-off, Cart -> Purchase, Cart -> Drop-off
        target=[1, 2,  2, 3],
        value=[498674,298539,259544,239130]
    )
))

fig.update_layout(title_text="用户行为流向图", font_size=10)
fig.show()

In [31]:
# pyecharts画流量图
from pyecharts import options as opts
from pyecharts.charts import Sankey
# 定义节点和链接数据
nodes = [
    {"name": "View"},
    {"name": "Cart"},
    {"name": "Purchase"},
    {"name": "Drop-off"},
]

links = [
    {"source": "View", "target": "Cart", "value": 498674},
    {"source": "View", "target": "Purchase", "value": 298539},
    {"source": "View", "target": "Drop-off", "value": 2250973},
    {"source": "Cart", "target": "Purchase", "value": 259544},
    {"source": "Cart", "target": "Drop-off", "value": 239130},

]
# 创建桑基图对象
sankey = Sankey()

# 添加数据
sankey.add("", nodes, links, linestyle_opt=opts.LineStyleOpts(opacity=0.2, curve=0.5, color="source"), label_opts=opts.LabelOpts(position="right"))

# 设置全局选项
sankey.set_global_opts(title_opts=opts.TitleOpts(title="桑基图示例"))

sankey.render("sankey_chart.html")


'c:\\Users\\29610\\Desktop\\REES46 ecommerce dataset\\sankey_chart.html'

# 商品价值
- '下单数','动销商品数','gmv','单均价'

In [106]:
# 商品价值
product_info = data.query('event_type == "purchase"').groupby('event_type').agg({'product_id': ['count','nunique'],'price': ['sum','mean']})
product_info.columns = ['下单数','动销商品数','gmv','单均价']
product_info

Unnamed: 0_level_0,下单数,动销商品数,gmv,单均价
event_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
purchase,742773,42241,229933200.0,309.560542


## 分类别

In [110]:
product_info_bycategory = data.query('event_type == "purchase"').groupby('category').agg({'product_id': ['count','nunique'],'price': ['sum','mean']})
product_info_bycategory.columns = ['下单数','动销商品数','gmv','单均价']
product_info_bycategory

Unnamed: 0_level_0,下单数,动销商品数,gmv,单均价
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
accessories,1587,554,68783.88,43.342079
apparel,8002,2512,624937.8,78.097694
appliances,74988,4838,13581760.0,181.11912
auto,10619,609,1273956.0,119.969461
computers,27853,2638,11377870.0,408.497139
construction,7801,939,932995.0,119.599413
country_yard,72,29,15695.45,217.992361
electronics,422979,4698,176445600.0,417.149735
furniture,8299,1634,1673243.0,201.619835
kids,5482,1007,678140.7,123.70315


## 分小类

In [112]:
product_info_bysubcate = data.query('event_type == "purchase"').groupby(['category','sub_category']).agg({'product_id': ['count','nunique'],'price': ['sum','mean']})

product_info_bysubcate.columns =  ['下单数','动销商品数','gmv','单均价']

product_info_bysubcate.query('下单数 > 0')

Unnamed: 0_level_0,Unnamed: 1_level_0,下单数,动销商品数,gmv,单均价
category,sub_category,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
accessories,bag,1253,421,53155.53,42.42261
accessories,umbrella,24,8,611.52,25.48
accessories,wallet,310,125,15016.83,48.441387
apparel,belt,21,9,1222.4,58.209524
apparel,costume,489,90,46492.98,95.077669
apparel,dress,15,13,871.31,58.087333
apparel,jacket,1,1,43.23,43.23
apparel,jeans,140,64,6163.23,44.023071
apparel,jumper,2,1,61.26,30.63
apparel,scarf,10,8,241.93,24.193


## 分商品名 

In [114]:
product_info_bypname = data.query('event_type == "purchase"').groupby('product_name').agg({'product_id': ['count','nunique'],'price': ['sum','mean']})

product_info_bypname.columns = ['下单数','动销商品数','gmv','单均价']

product_info_bypname.query('下单数 > 0')

Unnamed: 0_level_0,下单数,动销商品数,gmv,单均价
product_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
acoustic,888,171,205951.27,231.927106
air_conditioner,411,98,201596.51,490.502457
air_heater,2483,232,110755.87,44.605667
alarm,2441,76,335979.32,137.640033
bag,1253,421,53155.53,42.422610
...,...,...,...,...
wallet,310,125,15016.83,48.441387
washer,16146,342,4658223.46,288.506346
water_heater,2774,199,290985.54,104.897455
welding,1081,88,173856.12,160.828973


## 分品牌

In [115]:
product_info_bybrand = data.query('event_type == "purchase"').groupby('brand').agg({'product_id': ['count','nunique'],'price': ['sum','mean']})

product_info_bybrand.columns = ['下单数','动销商品数','gmv','单均价']

product_info_bybrand.query('下单数 > 0')

Unnamed: 0_level_0,下单数,动销商品数,gmv,单均价
brand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
a-case,58,17,275.26,4.745862
a-derma,1,1,12.36,12.360000
a-mega,9,5,1031.47,114.607778
abk,6,1,69.36,11.560000
abtoys,7,1,324.17,46.310000
...,...,...,...,...
zoom,1,1,167.31,167.310000
zotac,4,2,279.43,69.857500
zte,173,3,13623.87,78.750694
zubr,14,8,659.05,47.075000


## 分商品id

In [23]:
product_info_bybrand = data.query('event_type == "purchase"').groupby('product_id').agg({'product_id': ['count','nunique'],'price': ['sum','mean']})

product_info_bybrand.columns = ['下单数','动销商品数','gmv','单均价']

product_info_bybrand.query('下单数 > 0')

Unnamed: 0_level_0,下单数,动销商品数,gmv,单均价
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1000978,12,1,3840.13,320.010833
1001588,12,1,1538.18,128.181667
1002042,3,1,231.42,77.140000
1002062,18,1,1723.47,95.748333
1002098,13,1,4818.32,370.640000
...,...,...,...,...
59000007,1,1,47.88,47.880000
60400006,1,1,332.05,332.050000
60500001,4,1,102.34,25.585000
60500002,4,1,170.82,42.705000


# 用户价值
