In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
# 让画图内置
%matplotlib inline

# 忽略版本号之类的warnings
import warnings
warnings.filterwarnings('ignore')
import os
from datetime import date

# 数据导入

In [None]:
# 导入数据
# original_data = pd.read_csv('data\processed_data.csv')
# data = original_data.copy()
# data.shape

In [None]:
# 导入数据
data = pd.read_csv('data\processed_data.csv')
data.shape

(42413557, 11)

In [None]:
data['event_time'] = pd.to_datetime(data['event_time'])
data['category_code'] = data['category_code'].astype('category')
data['brand'] = data['brand'].astype('category')

data['event_day'] = data['event_time'].dt.date
data['event_hour'] = data['event_time'].dt.hour

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42413557 entries, 0 to 42413556
Data columns (total 13 columns):
 #   Column         Dtype         
---  ------         -----         
 0   event_time     datetime64[ns]
 1   event_type     object        
 2   product_id     int64         
 3   category_id    int64         
 4   category_code  category      
 5   brand          category      
 6   price          float64       
 7   user_id        int64         
 8   category       object        
 9   sub_category   object        
 10  product_name   object        
 11  event_day      object        
 12  event_hour     int32         
dtypes: category(2), datetime64[ns](1), float64(1), int32(1), int64(3), object(5)
memory usage: 3.4+ GB


In [None]:
data.head()

Unnamed: 0,event_time,event_type,product_id,category_id,category_code,brand,price,user_id,category,sub_category,product_name,event_day,event_hour
0,2019-10-01 00:00:00,view,44600062,2103807459595387724,,shiseido,35.79,541312140,,,,2019-10-01,0
1,2019-10-01 00:00:00,view,3900821,2053013552326770905,appliances.environment.water_heater,aqua,33.2,554748717,appliances,environment,water_heater,2019-10-01,0
2,2019-10-01 00:00:01,view,17200506,2053013559792632471,furniture.living_room.sofa,,543.1,519107250,furniture,living_room,sofa,2019-10-01,0
3,2019-10-01 00:00:01,view,1307067,2053013558920217191,computers.notebook,lenovo,251.74,550050854,computers,notebook,notebook,2019-10-01,0
4,2019-10-01 00:00:04,view,1004237,2053013555631882655,electronics.smartphone,apple,1081.98,535871217,electronics,smartphone,smartphone,2019-10-01,0


# 商品价值
- '下单数','动销商品数','gmv','单均价'

In [None]:
# 商品价值
product_info = data.query('event_type == "purchase"').groupby('event_type').agg({'product_id': ['count','nunique'],'price': ['sum','mean']})
product_info.columns = ['下单数','动销商品数','gmv','单均价']
product_info

Unnamed: 0_level_0,下单数,动销商品数,gmv,单均价
event_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
purchase,742773,42241,229933200.0,309.560542


## 分类别

In [None]:
product_info_bycategory = data.query('event_type == "purchase"').groupby('category').agg({'product_id': ['count','nunique'],'price': ['sum','mean']})
product_info_bycategory.columns = ['下单数','动销商品数','gmv','单均价']
product_info_bycategory

Unnamed: 0_level_0,下单数,动销商品数,gmv,单均价
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
accessories,1587,554,68783.88,43.342079
apparel,8002,2512,624937.8,78.097694
appliances,74988,4838,13581760.0,181.11912
auto,10619,609,1273956.0,119.969461
computers,27853,2638,11377870.0,408.497139
construction,7801,939,932995.0,119.599413
country_yard,72,29,15695.45,217.992361
electronics,422979,4698,176445600.0,417.149735
furniture,8299,1634,1673243.0,201.619835
kids,5482,1007,678140.7,123.70315


## 分小类

In [None]:
product_info_bysubcate = data.query('event_type == "purchase"').groupby(['category','sub_category']).agg({'product_id': ['count','nunique'],'price': ['sum','mean']})

product_info_bysubcate.columns =  ['下单数','动销商品数','gmv','单均价']

product_info_bysubcate.query('下单数 > 0')

Unnamed: 0_level_0,Unnamed: 1_level_0,下单数,动销商品数,gmv,单均价
category,sub_category,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
accessories,bag,1253,421,53155.53,42.42261
accessories,umbrella,24,8,611.52,25.48
accessories,wallet,310,125,15016.83,48.441387
apparel,belt,21,9,1222.4,58.209524
apparel,costume,489,90,46492.98,95.077669
apparel,dress,15,13,871.31,58.087333
apparel,jacket,1,1,43.23,43.23
apparel,jeans,140,64,6163.23,44.023071
apparel,jumper,2,1,61.26,30.63
apparel,scarf,10,8,241.93,24.193


## 分商品名 

In [None]:
product_info_bypname = data.query('event_type == "purchase"').groupby('product_name').agg({'product_id': ['count','nunique'],'price': ['sum','mean']})

product_info_bypname.columns = ['下单数','动销商品数','gmv','单均价']

product_info_bypname.query('下单数 > 0')

Unnamed: 0_level_0,下单数,动销商品数,gmv,单均价
product_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
acoustic,888,171,205951.27,231.927106
air_conditioner,411,98,201596.51,490.502457
air_heater,2483,232,110755.87,44.605667
alarm,2441,76,335979.32,137.640033
bag,1253,421,53155.53,42.422610
...,...,...,...,...
wallet,310,125,15016.83,48.441387
washer,16146,342,4658223.46,288.506346
water_heater,2774,199,290985.54,104.897455
welding,1081,88,173856.12,160.828973


## 分品牌

In [None]:
product_info_bybrand = data.query('event_type == "purchase"').groupby('brand').agg({'product_id': ['count','nunique'],'price': ['sum','mean']})

product_info_bybrand.columns = ['下单数','动销商品数','gmv','单均价']

product_info_bybrand.query('下单数 > 0')

Unnamed: 0_level_0,下单数,动销商品数,gmv,单均价
brand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
a-case,58,17,275.26,4.745862
a-derma,1,1,12.36,12.360000
a-mega,9,5,1031.47,114.607778
abk,6,1,69.36,11.560000
abtoys,7,1,324.17,46.310000
...,...,...,...,...
zoom,1,1,167.31,167.310000
zotac,4,2,279.43,69.857500
zte,173,3,13623.87,78.750694
zubr,14,8,659.05,47.075000


## 分商品id

In [None]:
product_info_bybrand = data.query('event_type == "purchase"').groupby('product_id').agg({'product_id': ['count','nunique'],'price': ['sum','mean']})

product_info_bybrand.columns = ['下单数','动销商品数','gmv','单均价']

product_info_bybrand.query('下单数 > 0')

Unnamed: 0_level_0,下单数,动销商品数,gmv,单均价
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1000978,12,1,3840.13,320.010833
1001588,12,1,1538.18,128.181667
1002042,3,1,231.42,77.140000
1002062,18,1,1723.47,95.748333
1002098,13,1,4818.32,370.640000
...,...,...,...,...
59000007,1,1,47.88,47.880000
60400006,1,1,332.05,332.050000
60500001,4,1,102.34,25.585000
60500002,4,1,170.82,42.705000


## 分下单方式
- 直接下单 / 加购后下单