# 金融风控页面案例

## 【实现】风控业务案例

### 案例背景介绍

- 通过对业务数据分析了解信贷业务状况
- 数据集说明
  - 从开源数据改造而来，基本反映真实业务数据
  - 销售，客服可以忽略
  - 账单周期，放款日期
  - 账单金额-实收金额 = 未收金额
  - 应付日期为还款时间
  - 账期分成两种：60天和90天
  - 实际到账日为空白，说明没还钱

In [2]:
import pandas as pd
from pyecharts.charts import *
from pyecharts import options as opts

df1 = pd.read_excel('./data/业务数据.xls') 

# 要使用原始数据构建新指标，所以保留原始数据，copy新的数据，在新的数据中创建新指标
df2 = df1.copy()

# head() 输出前五条数据
df2.head()

Unnamed: 0,销售,账单状态,账单周期,账单金额,开票金额,实收金额,未收金额,预计付款日,应付日期,商务催收日期,账期,实际到账日,开票日期,客服
0,s101,未确认,2019-05,29805.0,,,,2019-07-31,2019-07-31,2019-08-15,60,,,a201
1,s102,未确认,2019-05,1572.6,,,,2019-07-31,2019-07-31,2019-08-15,60,,,a202
2,s103,已确认,2019-04,487551.2,487551.2,,487551.2,2019-06-30,2019-06-30,2019-07-15,60,,05-16,a203
3,s104,已确认,2019-04,378835.0,378835.0,,378835.0,2019-07-31,2019-07-31,2019-08-15,90,,05-08,a204
4,s105,已确认,2019-04,326866.0,326866.0,,326866.0,2019-07-31,2019-07-31,2019-08-15,90,,05-10,a205


In [3]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5257 entries, 0 to 5256
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   销售      5257 non-null   object 
 1   账单状态    5257 non-null   object 
 2   账单周期    5257 non-null   object 
 3   账单金额    5257 non-null   float64
 4   开票金额    5010 non-null   float64
 5   实收金额    4470 non-null   float64
 6   未收金额    5010 non-null   float64
 7   预计付款日   5256 non-null   object 
 8   应付日期    5257 non-null   object 
 9   商务催收日期  5257 non-null   object 
 10  账期      5257 non-null   int64  
 11  实际到账日   4387 non-null   object 
 12  开票日期    4996 non-null   object 
 13  客服      5257 non-null   object 
dtypes: float64(4), int64(1), object(9)
memory usage: 575.1+ KB


In [4]:
df2.describe()

Unnamed: 0,账单金额,开票金额,实收金额,未收金额,账期
count,5257.0,5010.0,4470.0,5010.0,5257.0
mean,40732.41,40968.96,40824.19,4684.636,64.539661
std,81761.72,80072.45,79706.28,28884.64,15.622765
min,0.0,25.0,0.0,0.0,0.0
25%,5103.0,5300.0,5112.25,0.0,60.0
50%,14365.0,14865.6,14340.0,0.0,60.0
75%,41780.0,42202.5,41707.5,0.0,75.0
max,1508796.0,1356215.0,1301665.0,1277098.0,90.0


In [5]:
# 获取最大的日期，作为当前时间
today_time = pd.to_datetime(df2.实际到账日.fillna('0').max())

#给缺失值填充0
df2['实收金额'] = df2.实收金额.fillna(0)
df2['开票金额'] = df2.开票金额.fillna(0)
df2['未收金额'] = df2.未收金额.fillna(0)

#把时间类型转换为datetime类型
df2['账单周期'] = pd.to_datetime(df2.账单周期)
df2['应付日期'] = pd.to_datetime(df2.应付日期)

df2['实际到账日'] = pd.to_datetime(df2.实际到账日).fillna(today_time)

In [6]:
df2['是否到期'] = df2.apply(lambda x : 0 if x.应付日期 > today_time else 1,axis=1)

#map可以看做是apply，效果类似
df2['是否到期90天'] =  ( today_time - df2.应付日期 ).map(lambda x : 1 if x.days >= 90 else 0)

df2['未收金额2'] =  (df2.账单金额 - df2.实收金额)

df2['历史逾期天数'] = df2.apply(lambda x : (x.实际到账日 -  x.应付日期).days if x.未收金额2 == 0  else  (today_time - x.应付日期).days,axis=1)

#df2['当前逾期天数'] = df2.apply(lambda x : (x.历史逾期天数) if x.未收金额2 > 0  else 0 ,axis = 1)
df2['当前逾期天数'] = df2.apply(lambda x:(today_time - x['应付日期']).days if x['未收金额2'] > 0 else 0,axis=1)

df2

Unnamed: 0,销售,账单状态,账单周期,账单金额,开票金额,实收金额,未收金额,预计付款日,应付日期,商务催收日期,账期,实际到账日,开票日期,客服,是否到期,是否到期90天,未收金额2,历史逾期天数,当前逾期天数
0,s101,未确认,2019-05-01,29805.0,0.0,0.0,0.0,2019-07-31,2019-07-31,2019-08-15,60,2019-05-17,,a201,0,0,29805.0,-75,-75
1,s102,未确认,2019-05-01,1572.6,0.0,0.0,0.0,2019-07-31,2019-07-31,2019-08-15,60,2019-05-17,,a202,0,0,1572.6,-75,-75
2,s103,已确认,2019-04-01,487551.2,487551.2,0.0,487551.2,2019-06-30,2019-06-30,2019-07-15,60,2019-05-17,05-16,a203,0,0,487551.2,-44,-44
3,s104,已确认,2019-04-01,378835.0,378835.0,0.0,378835.0,2019-07-31,2019-07-31,2019-08-15,90,2019-05-17,05-08,a204,0,0,378835.0,-75,-75
4,s105,已确认,2019-04-01,326866.0,326866.0,0.0,326866.0,2019-07-31,2019-07-31,2019-08-15,90,2019-05-17,05-10,a205,0,0,326866.0,-75,-75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5252,s5353,已确认,2017-07-01,22800.0,0.0,0.0,0.0,2017-07-31,2017-08-31,2017-09-15,30,2019-05-17,,a5453,1,1,22800.0,624,624
5253,s5354,已确认,2017-07-01,6483.0,0.0,0.0,0.0,2017-09-30,2017-09-30,2017-10-15,60,2019-05-17,,a5454,1,1,6483.0,594,594
5254,s5355,已核销,2017-06-01,418795.0,418795.0,418795.0,0.0,2017-08-14,2017-08-31,2017-09-15,60,2017-08-31,11-02,a5455,1,1,0.0,0,0
5255,s5356,已核销,2017-06-01,86337.0,86337.0,86337.0,0.0,2017-08-31,2017-08-31,2017-09-15,60,2017-08-22,11-02,a5456,1,1,0.0,-9,0


In [7]:
df3 =df2.copy()
#创建’账单季度‘字段，将日期转换成季度，to_period函数可以转换为季度信息
df3['账单季度'] = df3['账单周期'].map(lambda x : x.to_period('Q'))
#提取2017年3季度到2018年4季度数据
df3 = df3[(df3['账单季度']<='2018Q4') & (df3['账单季度']>='2017Q3')]
df3.shape

df3


Unnamed: 0,销售,账单状态,账单周期,账单金额,开票金额,实收金额,未收金额,预计付款日,应付日期,商务催收日期,账期,实际到账日,开票日期,客服,是否到期,是否到期90天,未收金额2,历史逾期天数,当前逾期天数,账单季度
1398,s1499,已确认,2018-12-01,158695.0,158695.0,0.0,158695.0,2019-05-16,2019-02-28,2019-03-15,60,2019-05-17,03-21,a1599,1,0,158695.0,78,78,2018Q4
1399,s1500,已确认,2018-12-01,132310.0,132310.0,0.0,132310.0,2019-04-05,2019-03-31,2019-04-15,90,2019-05-17,01-17,a1600,1,0,132310.0,47,47,2018Q4
1400,s1501,已确认,2018-12-01,103570.0,103570.0,0.0,103570.0,2019-05-17,2019-02-28,2019-03-15,60,2019-05-17,01-25,a1601,1,0,103570.0,78,78,2018Q4
1401,s1502,已确认,2018-12-01,55333.0,55333.0,0.0,55333.0,2019-05-10,2019-03-31,2019-04-15,90,2019-05-17,03-01,a1602,1,0,55333.0,47,47,2018Q4
1402,s1503,部分核销,2018-12-01,42710.0,42710.0,0.0,42710.0,2019-05-16,2019-02-28,2019-03-15,60,2019-04-09,01-30,a1603,1,0,42710.0,78,78,2018Q4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5249,s5350,已核销,2017-07-01,12450.0,12450.0,12450.0,0.0,2017-09-30,2017-09-30,2017-10-15,60,2017-10-11,11-07,a5450,1,1,0.0,11,0,2017Q3
5250,s5351,已确认,2017-07-01,1900.0,0.0,0.0,0.0,2017-10-31,2017-10-31,2017-11-15,90,2019-05-17,,a5451,1,1,1900.0,563,563,2017Q3
5251,s5352,已确认,2017-07-01,11500.0,0.0,0.0,0.0,2017-09-30,2017-09-30,2017-10-15,60,2019-05-17,,a5452,1,1,11500.0,594,594,2017Q3
5252,s5353,已确认,2017-07-01,22800.0,0.0,0.0,0.0,2017-07-31,2017-08-31,2017-09-15,30,2019-05-17,,a5453,1,1,22800.0,624,624,2017Q3


In [8]:
#账单金额
fn1 = df3.groupby('账单季度')[['账单金额']].sum()
fn1.columns = ['账单金额']
fn1

Unnamed: 0_level_0,账单金额
账单季度,Unnamed: 1_level_1
2017Q3,8247952.62
2017Q4,11643604.99
2018Q1,17149674.79
2018Q2,31097661.29
2018Q3,38292071.12
2018Q4,51963089.64


In [9]:
#90天到期金额
df4 = df3[(df3.是否到期90天 == 1)]

fn2 = df4.groupby('账单季度')[['账单金额']].sum()
fn2.columns = ['到期金额']
fn2

Unnamed: 0_level_0,到期金额
账单季度,Unnamed: 1_level_1
2017Q3,8247952.62
2017Q4,11643604.99
2018Q1,17149674.79
2018Q2,31097661.29
2018Q3,38292071.12
2018Q4,28265677.59


In [10]:
#当前逾期90+金额
df4 = df3[(df3.是否到期90天 == 1)]
fn3 = df4.groupby('账单季度')[['未收金额2']].sum()
fn3.columns = ['当前逾期90+金额']
fn3

Unnamed: 0_level_0,当前逾期90+金额
账单季度,Unnamed: 1_level_1
2017Q3,63883.0
2017Q4,57380.0
2018Q1,64283.0
2018Q2,106930.0
2018Q3,412920.1
2018Q4,304183.0


In [11]:
dfs = [fn1,fn2,fn3]
final1 = pd.concat(dfs,axis=1)
final1

Unnamed: 0_level_0,账单金额,到期金额,当前逾期90+金额
账单季度,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017Q3,8247952.62,8247952.62,63883.0
2017Q4,11643604.99,11643604.99,57380.0
2018Q1,17149674.79,17149674.79,64283.0
2018Q2,31097661.29,31097661.29,106930.0
2018Q3,38292071.12,38292071.12,412920.1
2018Q4,51963089.64,28265677.59,304183.0


In [12]:
final1['90+净坏账率'] = round(final1['当前逾期90+金额'] / final1.到期金额,3)
final1

Unnamed: 0_level_0,账单金额,到期金额,当前逾期90+金额,90+净坏账率
账单季度,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017Q3,8247952.62,8247952.62,63883.0,0.008
2017Q4,11643604.99,11643604.99,57380.0,0.005
2018Q1,17149674.79,17149674.79,64283.0,0.004
2018Q2,31097661.29,31097661.29,106930.0,0.003
2018Q3,38292071.12,38292071.12,412920.1,0.011
2018Q4,51963089.64,28265677.59,304183.0,0.011


In [13]:
bar = (
    Bar()
    .add_xaxis(list(final1.index.values.astype(str)))
    .add_yaxis(
        "账单金额",
        list(final1.账单金额),
        yaxis_index=0,
        color="#5793f3",
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(title="90+净坏账率"),
    )
    .extend_axis(
        yaxis=opts.AxisOpts(
            name="90+净坏账率",
            type_="value",
            min_=0,
            max_=0.014,
            position="right",
            axisline_opts=opts.AxisLineOpts(
                linestyle_opts=opts.LineStyleOpts(color="#d14a61")
            ),
            axislabel_opts=opts.LabelOpts(formatter="{value}"),
        )
    )
)
line = (
    Line()
    .add_xaxis(list(final1.index.values.astype(str)))
    .add_yaxis(
        "90+净坏账率",
        list(final1['90+净坏账率']),
        yaxis_index=1,
        color="#675bba",
        label_opts=opts.LabelOpts(is_show=False),
    )
)
# 在 ./chart 目录下生成 90+净坏账率.html
bar.overlap(line).render('./chart/90+净坏账率.html')

'd:\\workspace\\machine-learning\\financial-risk-control\\chart\\90+净坏账率.html'