In [1]:
import pandas as pd
import numpy as np
from multiprocessing import Pool
from functools import partial
from tqdm import tqdm_notebook as tqdm
from datetime import date, timedelta
import pickle
import Model_func as func

In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation, Flatten, TimeDistributed, LeakyReLU, Conv2D, concatenate, BatchNormalization, MaxPooling2D, AveragePooling1D, Reshape
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras import layers, optimizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2

In [3]:
with open(r'D:\Customer_Value\data\train_local\train_v1', 'rb') as fp:
    train = pickle.load(fp)
with open(r'D:\Customer_Value\data\train_local\val_v1', 'rb') as fp:
    val = pickle.load(fp)
with open(r'D:\Customer_Value\data\train_local\test_v1', 'rb') as fp:
    test = pickle.load(fp)

Transfer Data Form to trainable

Model Separating Asset and Trade

In [4]:
train_trade = np.array([[data[i].values.tolist() for i in range(2)] for data in train])
train_asset = np.array([[data[i].values.tolist() for i in range(2, 5)] for data in train])
train_demo = np.array([data[5] for data in train])
label_train = [data[7] for data in train]
Y_train = pd.DataFrame(label_train, columns=['label'])
Y_train = pd.get_dummies(Y_train, columns=['label'])
Y_train = Y_train.values

In [5]:
val_trade = np.array([[data[i].values.tolist() for i in range(2)] for data in val])
val_asset = np.array([[data[i].values.tolist() for i in range(2, 5)] for data in val])
val_demo = np.array([data[5] for data in val])
label_val = [data[7] for data in val]
Y_val = pd.DataFrame(label_val, columns=['label'])
Y_val = pd.get_dummies(Y_val, columns=['label'])
Y_val = Y_val.values

In [6]:
test_trade = np.array([[data[i].values.tolist() for i in range(2)] for data in test])
test_asset = np.array([[data[i].values.tolist() for i in range(2, 5)] for data in test])
test_demo = np.array([data[5] for data in test])
label_test = [data[7] for data in test]
Y_test = pd.DataFrame(label_test, columns=['label'])
Y_test = pd.get_dummies(Y_test, columns=['label'])
Y_test = Y_test.values

In [28]:
input_trade = Input(train_trade.shape[1:])
trade_model = Conv2D(filters=1, 
              strides=1, 
              padding='valid', 
              activation='relu',
              kernel_size=(1, 11),
              data_format='channels_first')(input_trade)
trade_model = Conv2D(filters=1, 
              strides=1, 
              padding='valid', 
              activation='relu',
              kernel_size=(10, 1),
              data_format='channels_first')(trade_model)
trade_model = Flatten()(trade_model)
input_asset = Input(train_asset.shape[1:])
asset_model = Conv2D(filters=1, 
              strides=1, 
              padding='valid', 
              activation='relu',
              kernel_size=(1, 21),
              data_format='channels_first')(input_asset)
asset_model = Conv2D(filters=1, 
              strides=1, 
              padding='valid', 
              activation='relu',
              kernel_size=(10, 1),
              data_format='channels_first')(asset_model)
asset_model = Flatten()(asset_model)
input_demographic = Input(train_demo.shape[1:])
combinedInput = concatenate([trade_model, asset_model, input_demographic])
full_model = Dense(128, activation='relu')(combinedInput)
full_model = Dense(32, activation='relu')(full_model)
full_model = Dense(8, activation='relu')(full_model)
full_model = Dense(2, activation='softmax')(full_model)
model = Model([input_trade, input_asset, input_demographic], full_model)
opt = optimizers.Nadam(lr=0.001)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=[tf.keras.metrics.AUC()])
model.fit([train_trade, train_asset, train_demo], Y_train, validation_data=([val_trade, val_asset, val_demo], Y_val), batch_size=8, epochs=10, verbose=2)

Train on 40655 samples, validate on 1967 samples
Epoch 1/10
40655/40655 - 25s - loss: 0.4613 - auc_1: 0.8643 - val_loss: 0.4205 - val_auc_1: 0.8889
Epoch 2/10
40655/40655 - 24s - loss: 0.4189 - auc_1: 0.8900 - val_loss: 0.4148 - val_auc_1: 0.8938
Epoch 3/10
40655/40655 - 24s - loss: 0.4035 - auc_1: 0.8985 - val_loss: 0.3958 - val_auc_1: 0.9029
Epoch 4/10
40655/40655 - 24s - loss: 0.3883 - auc_1: 0.9062 - val_loss: 0.4140 - val_auc_1: 0.8986
Epoch 5/10
40655/40655 - 24s - loss: 0.3727 - auc_1: 0.9139 - val_loss: 0.4082 - val_auc_1: 0.8987
Epoch 6/10
40655/40655 - 24s - loss: 0.3541 - auc_1: 0.9227 - val_loss: 0.4301 - val_auc_1: 0.8875
Epoch 7/10
40655/40655 - 24s - loss: 0.3315 - auc_1: 0.9325 - val_loss: 0.4610 - val_auc_1: 0.8817
Epoch 8/10
40655/40655 - 24s - loss: 0.3090 - auc_1: 0.9414 - val_loss: 0.5053 - val_auc_1: 0.8771
Epoch 9/10


KeyboardInterrupt: 

Evaluation

In [11]:
result = func.Evaluation([test_trade, test_asset, test_demo], label_test, threshold=0.5)
print(result[0])
print(result[1])

0.8563327032136105
              precision    recall  f1-score   support

      ACTIVE       0.88      0.82      0.85       514
       CHURN       0.84      0.90      0.87       544

    accuracy                           0.86      1058
   macro avg       0.86      0.86      0.86      1058
weighted avg       0.86      0.86      0.86      1058



In [28]:
model.save(r'D:\Customer_Value\model\first_stage.h5')

Evaluation on other samples

In [3]:
model = load_model(r'D:\Customer_Value\model\first_stage.h5')

W0303 09:03:22.524970 12480 deprecation.py:506] From C:\Users\011553\AppData\Local\Continuum\anaconda3\envs\tf-gpu\lib\site-packages\tensorflow\python\ops\init_ops.py:97: calling GlorotUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0303 09:03:22.527948 12480 deprecation.py:506] From C:\Users\011553\AppData\Local\Continuum\anaconda3\envs\tf-gpu\lib\site-packages\tensorflow\python\ops\init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0303 09:03:22.528949 12480 deprecation.py:506] From C:\Users\011553\AppData\Local\Continuum\anaconda3\envs\tf-gpu\lib\site-packages\tensorflow\pyth

In [4]:
with open(r'D:\Customer_Value\data\evaluate\local\evaluate_large', 'rb') as fp:
    eval_test = pickle.load(fp)

In [7]:
# eval_test_trade = np.array([[data[i].values.tolist() for i in range(2)] for data in eval_test])
# eval_test_asset = np.array([[data[i].values.tolist() for i in range(2, 5)] for data in eval_test])
eval_test_demo = np.array([data[5] for data in eval_test])
label_test_eval = [data[7] for data in eval_test]
Y_test_eval = pd.DataFrame(label_test_eval, columns=['label'])
Y_test_eval = pd.get_dummies(Y_test_eval, columns=['label'])
Y_test_eval = Y_test_eval.values

In [43]:
test_trade.shape

(1041, 2, 120, 11)

In [10]:
result = func.Evaluation([eval_test_trade, eval_test_asset, eval_test_demo], label_test_eval, model, threshold=0.5)
print(result[0])
print(result[1])

  'recall', 'true', average, warn_for)


0.83064
              precision    recall  f1-score   support

      ACTIVE       1.00      0.83      0.91    150000
       CHURN       0.00      0.00      0.00         0

    accuracy                           0.83    150000
   macro avg       0.50      0.42      0.45    150000
weighted avg       1.00      0.83      0.91    150000

