In [None]:
'''sh
pip install -r requirements.txt
cd code
'''


In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import random
SEED = 42
def seed_everything(seed=0):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

seed_everything(SEED)

### 第一问
银行根据中小微企业的实力、信誉对其信贷风险做出评估，然后依据信贷风险等因素来确定是否放贷及贷款额度、利率和期限等信贷策略。

某银行对确定要放贷企业的贷款额度为10~100万元；年利率为4%~15%；贷款期限为1年。附件1~3分别给出了123家有信贷记录企业的相关数据、302家无信贷记录企业的相关数据和贷款利率与客户流失率关系的2019年统计数据。

对附件1中123家企业的信贷风险进行量化分析，给出该银行在年度信贷总额固定时对这些企业的信贷策略。

##### 数据处理

##### 特征工程

In [2]:
import load

data = load.re_agg(load.get_data(num=123)).drop(['refund_in','refund_out'],axis = 1).fillna(1e-8)


##### 数据集整理

In [3]:
input_length = 12
label_length = 1
total_length = 36
batch_size = 123
input,test,label = load.get_tesors(data,input_length = input_length,label_length = label_length,total_length = total_length,batch_size = batch_size)

##### 训练神经网络模型，预测2020年的信用等级和违约概率

In [4]:
CONV_WIDTH = 3
OUT_STEPS = 1
num_labels = label.shape[-1]
multi_conv_model = tf.keras.Sequential([
    # Shape [batch, time, features] => [batch, CONV_WIDTH, features]
    tf.keras.layers.Lambda(lambda x: x[:, -CONV_WIDTH:, :]),
    # Shape => [batch, 1, conv_units]
    tf.keras.layers.Conv1D(256, activation='relu', kernel_size=(CONV_WIDTH)),

    # Shape => [batch, 1,  out_steps*features]
    tf.keras.layers.Dense(OUT_STEPS*num_labels,
                          kernel_initializer=tf.initializers.zeros()),
    # Shape => [batch, out_steps, features]
    tf.keras.layers.Reshape([OUT_STEPS, num_labels])
])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=4, restore_best_weights=True,
                                                    mode='min')

lr_schedule = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: 1e-8 * 10**(epoch / 20))

multi_conv_model.compile(loss=tf.keras.losses.MeanSquaredError(),
                optimizer=tf.keras.optimizers.Adam(),
                metrics=[tf.keras.metrics.MeanAbsoluteError()])

history = multi_conv_model.fit(
    x=input, y = label, 
    batch_size=123, 
    epochs=100, 
    verbose=1, 
    callbacks=[early_stopping], 
    validation_split=0.3,  
    shuffle=True, 
    class_weight=None, 
    sample_weight=None, 
    initial_epoch=0, 
    steps_per_epoch=None, 
    validation_steps=None)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100


In [5]:
lstm_model = tf.keras.models.Sequential([
  #tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1),input_shape=[None]),
  tf.keras.layers.LSTM(
    units = 4,
    activation="tanh",
    recurrent_activation="sigmoid",return_sequences= True),
    tf.keras.layers.LSTM(
    units = 16,
    activation="tanh",
    recurrent_activation="sigmoid",return_sequences= True),
  tf.keras.layers.LSTM(
    units = 32,
    activation="tanh",
    recurrent_activation="sigmoid",return_sequences= True),
    tf.keras.layers.LSTM(
    units = 64,
    activation="tanh",
    recurrent_activation="sigmoid",return_sequences= True),
    tf.keras.layers.LSTM(
    units = 128,
    activation="tanh",
    recurrent_activation="sigmoid"),
  tf.keras.layers.Dense(2),
  tf.keras.layers.Lambda(lambda x: x * 1000.0)
])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=10, restore_best_weights=True,
                                                    mode='min')

lr_schedule = tf.keras.callbacks.LearningRateScheduler(
    lambda epoch: 0.1 * 10**(-epoch / 20))

lstm_model.compile(loss=tf.keras.losses.MeanSquaredError(),
                optimizer=tf.keras.optimizers.Adam(learning_rate = 0.0001),
                metrics=[tf.keras.metrics.MeanAbsoluteError()])
history = lstm_model.fit(
    x=input, y = label, 
    batch_size=25, 
    epochs=100, 
    verbose=1, 
    callbacks=[early_stopping], 
    validation_split=0.3,  
    shuffle=True, 
    class_weight=None, 
    sample_weight=None, 
    initial_epoch=0, 
    steps_per_epoch=None, 
    validation_steps=None)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100


In [6]:
import tfm

tfm_model = tfm.tsf_model(
    input_shape = input.shape[1:],
    head_size=64,
    num_heads=8,
    ff_dim=16,
    num_transformer_blocks=4,
    mlp_units=[123],
    #mlp_dropout=4,
    #dropout=2,
)

tfm_model.compile(
    loss=tf.keras.losses.MeanSquaredError(),
                optimizer=tf.keras.optimizers.Adam(),
                metrics=[tf.keras.metrics.MeanAbsoluteError()]
)
#model.summary()

callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=5,restore_best_weights=True,
                                                    mode='min')]

history = tfm_model.fit(
    x = input,
    y = label,
    validation_split=0.3,
    epochs=200,
    batch_size=8,
    callbacks=callbacks,
)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200


##### 测试结果

In [7]:
cnn_result = load.get_output(test,model = multi_conv_model)
cnn_result.to_csv('./results/cnn.csv')

lstm_result = load.get_output(test,model = lstm_model)
lstm_result.to_csv('./results/lstm.csv')

tfm_result = load.get_output(test,model = tfm_model)
tfm_result.to_csv('./results/tfm.csv')

##### 预测后302家企业的信用评级与违约概率

In [10]:
import load

data = load.re_agg(load.get_data(num=302)).drop(['refund_in','refund_out'],axis = 1).fillna(1e-8)
input_length = 12
label_length = 1
total_length = 36
batch_size = 302
input,test,label = load.get_tesors(data,input_length,label_length,total_length,batch_size)

In [11]:
cnn_result = load.get_output(test,model = multi_conv_model)
cnn_result.to_csv('./results/cnn.csv')

lstm_result = load.get_output(test,model = lstm_model)
lstm_result.to_csv('./results/lstm.csv')

tfm_result = load.get_output(test,model = tfm_model)
tfm_result.to_csv('./results/tfm.csv')

##### 确定信贷策略