In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('tmall_order_report.csv')
df.head()

In [None]:
df.columns

In [None]:
df=df.rename(columns={'收货地址 ':'收货地址','订单付款时间 ':'订单付款时间'})
df.columns

In [None]:
df.info()

In [None]:
df['订单创建时间']=pd.to_datetime(df.订单创建时间)
df['订单付款时间']=pd.to_datetime(df.订单付款时间)
df.info()

In [None]:
df.duplicated().sum()


In [None]:
df.isnull().sum()

In [None]:
df.describe()

In [None]:
import pandas_profiling as pp
report = pp.ProfileReport(df)
report

In [None]:
df_payed=df[df['订单付款时间'].notnull()]#支付订单数据集
df_trans=df_payed[df_payed['买家实际支付金额']!=0]#到款订单数据集
df_trans_full=df_payed[df_payed['退款金额']==0]#全额到款订单数据集

In [None]:
import pyecharts.options as opts
#将订单创建时间设为index
df_trans=df_trans.set_index('订单创建时间')
#按天重新采样
se_trans_month = df_trans.resample('D')['订单编号'].count()
from pyecharts.charts import Line
#做出标有具体数值的变化图
name = '成交订单数'

(
    Line()
    .add_xaxis(xaxis_data = list(se_trans_month.index.day.map(str)))
    .add_yaxis(
        series_name= name,
        y_axis= se_trans_month,

    )
    .set_global_opts(
        yaxis_opts = opts.AxisOpts(
            splitline_opts = opts.SplitLineOpts(is_show = True)
        )
    )
    .render_notebook()
)

In [None]:
se_trans_map=df_trans.groupby('收货地址')['收货地址'].count().sort_values(ascending=False)
# 为了保持由于下面的地理分布图使用的省份名称一致，定义一个处理自治区的函数
def strip_region(iterable):
    result = []
    for i in iterable:
        if i.endswith('自治区'):
            if i == '内蒙古自治区':
                i = i[:3]
                result.append(i)
            else:
                result.append(i[:2])
        else:
            result.append(i)
    return result
# 处理自治区
se_trans_map.index = strip_region(se_trans_map.index)
# 去掉末位‘省’字
se_trans_map.index = se_trans_map.index.str.strip('省')

import pyecharts.options as opts
from pyecharts.charts import Map
# 展示地理分布图
name = '订单数'

(
    Map()
    .add(
        series_name = name,
        data_pair= [list(i) for i in se_trans_map.items()])
    .set_global_opts(visualmap_opts=opts.VisualMapOpts(
        max_=max(se_trans_map)*0.6
    )
                    )
    .render_notebook()
)

In [None]:
dict_convs=dict() #字典
dict_convs['总订单数']=len(df)
df_payed
dict_convs['订单付款数']=len(df_payed.notnull())
df_trans=df[df['买家实际支付金额']!=0]
dict_convs['到款订单数']=len(df_trans)
dict_convs['全额到款订单数']=len(df_trans_full)

In [None]:
#字典转为dataframe
df_convs = pd.Series(dict_convs,name = '订单数').to_frame()
df_convs
#求总体转换率，依次比上总订单数
total_convs=df_convs['订单数']/df_convs.loc['总订单数','订单数']*100
df_convs['总体转化率']=total_convs.apply(lambda x:round(x,0))
df_convs
#求单一转换率
single_convs=df_convs.订单数/(df_convs.订单数.shift())*100
single_convs=single_convs.fillna(100)
df_convs['单一转化率']=single_convs.apply(lambda x:round(x,0))
df_convs

In [None]:
from pyecharts.charts import Funnel
from pyecharts import options as opts
name = '总体转化率'
funnel = Funnel().add(
                    series_name = name,
                    data_pair = [ list(z) for z in zip(df_convs.index,df_convs[name]) ],
                    is_selected = True,
                    label_opts = opts.LabelOpts(position = 'inside')
                    )
funnel.set_series_opts(tooltip_opts = opts.TooltipOpts(formatter = '{a}<br/>{b}:{c}%'))

funnel.set_global_opts( title_opts = opts.TitleOpts(title = name),
#                         tooltip_opts = opts.TooltipOpts(formatter = '{a}<br\>{b}:{c}%'),
                      )
                        
funnel.render_notebook()

In [None]:
name = '单一转化率'
funnel = Funnel().add(
                    series_name = name,
                    data_pair = [ list(z) for z in zip(df_convs.index,df_convs[name]) ],
                    is_selected = True,
                    label_opts = opts.LabelOpts(position = 'inside')
                    )
funnel.set_series_opts(tooltip_opts = opts.TooltipOpts(formatter = '{a}<br/>{b}:{c}%'))

funnel.set_global_opts( title_opts = opts.TitleOpts(title = name),
#                         tooltip_opts = opts.TooltipOpts(formatter = '{a}<br\>{b}:{c}%'),
                      )
                        
funnel.render_notebook()