# 数据分析与可视化

导入包

In [1]:
import pandas as pd
from pyecharts import Bar, Pie, Line

读入数据并复制一份进行预处理，方便绘图

In [2]:
data = pd.read_csv('data/ccf_offline_stage1_train.csv')
offline = data.copy()
offline.head(5)

Unnamed: 0,User_id,Merchant_id,Coupon_id,Discount_rate,Distance,Date_received,Date
0,1439408,2632,,,0.0,,20160217.0
1,1439408,4663,11002.0,150:20,1.0,20160528.0,
2,1439408,2632,8591.0,20:1,0.0,20160217.0,
3,1439408,2632,1078.0,20:1,0.0,20160319.0,
4,1439408,2632,8591.0,20:1,0.0,20160613.0,


In [3]:
offline['Distance'].fillna(-1,inplace=True)
offline['date_received'] = pd.to_datetime(offline['Date_received'], format='%Y%m%d')
offline['date'] = pd.to_datetime(offline['Date'], format='%Y%m%d')
offline['discount_rate'] = offline['Discount_rate'].map(lambda x:float(x) if ':' not in str(x) else (float(str(x).split(':')[0])-float(str(x).split(':')[1])) / float(str(x).split(':')[0]))
offline['isManjian'] = offline['Discount_rate'].map(lambda x: 1 if ':' in str(x) else 0)
offline['weekday_Receive'] = offline['date_received'].apply(lambda x: x.isoweekday())
offline['label'] = list(map(lambda x, y: 1 if (x-y).total_seconds()/(60*60*24) <= 15 else 0, offline['date'], offline['date_received']))
offline.head(5)

Unnamed: 0,User_id,Merchant_id,Coupon_id,Discount_rate,Distance,Date_received,Date,date_received,date,discount_rate,isManjian,weekday_Receive,label
0,1439408,2632,,,0.0,,20160217.0,NaT,2016-02-17,,0,,0
1,1439408,4663,11002.0,150:20,1.0,20160528.0,,2016-05-28,NaT,0.866667,1,6.0,0
2,1439408,2632,8591.0,20:1,0.0,20160217.0,,2016-02-17,NaT,0.95,1,3.0,0
3,1439408,2632,1078.0,20:1,0.0,20160319.0,,2016-03-19,NaT,0.95,1,6.0,0
4,1439408,2632,8591.0,20:1,0.0,20160613.0,,2016-06-13,NaT,0.95,1,1.0,0


## 1.每天领券次数

In [4]:
df_1 = offline[offline['Date_received'].notna()]
tmp = df_1.groupby('Date_received', as_index=False)['Coupon_id'].count()
tmp.columns = ['Date_received','count']
tmp.head(5)

Unnamed: 0,Date_received,count
0,20160101.0,554
1,20160102.0,542
2,20160103.0,536
3,20160104.0,577
4,20160105.0,691


In [5]:
bar_1 = Bar("每天被领券的数量",width=1500,height=600)
bar_1.add("",list(tmp['Date_received']),list(tmp['count']),xaxis_interval=1,xaxis_rotate=60,mark_line=['max'])
bar_1.render('imgs/bar_1.html')

/bin/sh: 1: Syntax error: word unexpected (expecting ")")


## 2.每月各类消费折线图

In [7]:
offline['received_month'] = offline['date_received'].apply(lambda x:x.month)
consume_coupon = offline[offline['label'] == 1]['received_month'].value_counts(sort=False)
received = offline['received_month'].value_counts(sort=False)
offline['date_month'] = offline['date'].apply(lambda x:x.month)
consume = offline['date_month'].value_counts(sort=False)
consume_coupon.sort_index(inplace=True)
consume.sort_index(inplace=True)
received.sort_index(inplace=True)
line_1 = Line("每月各类消费折线图")
line_1.add("核销",list(range(1,7)),list(consume_coupon.values))
line_1.add("领取",list(range(1,7)),list(received.values))
line_1.add("消费",list(range(1,7)),list(consume.values))
line_1.render('imgs/line_1.html')

## 3.消费距离柱状图

In [8]:
offline['Distance'].fillna(-1,inplace=True)
dis = offline[offline['Distance'] != -1]['Distance'].value_counts()
dis.sort_index(inplace=True)
bar_2 = Bar("消费距离柱状图")
bar_2.add('',list(dis.index),list(dis.values))
bar_2.render('imgs/bar_2.html')

## 4.消费距离与核销率柱状图

In [12]:
rate = [offline[offline['Distance'] == i]['label'].value_counts()[1]*1.0 / 
       offline[offline['Distance'] == i]['label'].value_counts().sum() for i in range(11)]
bar_3 = Bar("消费距离与核销率柱状图")
bar_3.add('核销率',list(range(11)),list(rate))
bar_3.render('imgs/bar_3.html')

## 5.各类消费券数量占比饼图

In [13]:
pie_1 = Pie("各类消费券数量占比饼图")
pie_1.add('',['折扣','满减'],list(offline[offline['Date_received'].notna()]['isManjian'].value_counts(sort=False).values),is_label_show=True)
pie_1.render('imgs/pie_1.html')

## 6.核销优惠券的占比图

In [14]:
pie_2 = Pie("核销优惠券数量占比饼图")
pie_2.add('',['折扣','满减'],list(offline[offline['label']==1]['isManjian'].value_counts(sort=False).values),is_label_show=True)
pie_2.render('imgs/pie_2.html')

## 7.各种折扣率的优惠券领取与核销柱状图

In [16]:
bar_4 = Bar("各种折扣率的优惠券领取与核销柱状图")
received = offline['discount_rate'].value_counts(sort=False)
consume_coupon = offline[offline['label'] == 1]['discount_rate'].value_counts(sort=False)
consume_coupon[0.975000] = 0
consume_coupon.sort_index(inplace=True)
received.sort_index(inplace=True)
bar_4.add('领取',[float('%.4f' % x) for x in received.index],list(received.values),xaxis_rotate=50)
bar_4.add('核销',[float('%.4f' % x) for x in consume_coupon.index], list(consume_coupon.values),xaxis_rotate=50)
bar_4.render('imgs/bar_4.html')

## 8.每周内领券数与核销数折线图

In [20]:
consume_coupon = offline[offline['label'] == 1]['weekday_Receive'].value_counts()
consume_coupon.sort_index(inplace=True)
received = offline['weekday_Receive'].value_counts()
received.sort_index(inplace=True)
line_2 = Line("每周领券数与核销数折线图")
line_2.add('领券',list(range(1,8)),list(received.values),is_label_show=True)
line_2.add('核销',list(range(1,8)),list(consume_coupon.values),is_label_show=True)
line_2.render('imgs/line_2.html')

## 9.正负样本比例图

In [21]:
pie_3 = Pie("正-负比例饼图")
pie_3.add('',['负','正'],list(offline['label'].value_counts().values),is_label_show=True)
pie_3.render('imgs/pie_3.html')