In [1]:
import sys
sys.path.append("../")

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from scorecardpipeline import *
from scorecardpipeline.rule_extraction import DecisionTreeRuleExtractor


logger = init_setting(seed=8888, logger=True)


feature_map = {}
n_samples = 10000
ab = np.array(list('ABCDEFG'))

data = pd.DataFrame({
    'A': np.random.randint(10, size = n_samples),
    'B': ab[np.random.choice(7, n_samples)],
    'C': ab[np.random.choice(2, n_samples)],
    '时间': np.random.random(size = n_samples),
    'target': np.random.randint(2, size = n_samples)
})


train, test = train_test_split(data, test_size=0.3, stratify=data["target"])


pdtr = DecisionTreeRuleExtractor(target="target", feature_map=feature_map, max_iter=8)
pdtr.fit(train, lift=0., max_depth=2, max_samples=1., verbose=False, min_samples_split=8, min_samples_leaf=5)
report = pdtr.report(valid=[test, train, data], save="model_report/决策树组合策略挖掘.xlsx")

In [2]:
report[0]

Unnamed: 0,组合策略,命中数,命中率,好样本数,好样本占比,坏样本数,坏样本占比,坏率,样本整体坏率,LIFT值
0,B <= 0.495 & 时间 > 0.943,243,0.0347,142,0.0401,101,0.0292,0.4156,0.4941,0.8411
1,B <= 0.495 & 时间 <= 0.943,3758,0.5369,1933,0.5459,1825,0.5276,0.4856,0.4941,0.9828
2,B > 0.495 & 时间 <= 0.877,2669,0.3813,1321,0.3731,1348,0.3897,0.5051,0.4941,1.0221
3,B > 0.495 & 时间 > 0.877,330,0.0471,145,0.0409,185,0.0535,0.5606,0.4941,1.1345
4,B <= 0.495 & A <= 8.5,3605,0.515,1889,0.5335,1716,0.4961,0.476,0.4941,0.9633
5,B > 0.495 & A > 4.5,1496,0.2137,749,0.2115,747,0.216,0.4993,0.4941,1.0105
6,B > 0.495 & A <= 4.5,1503,0.2147,717,0.2025,786,0.2272,0.523,0.4941,1.0583
7,B <= 0.495 & A > 8.5,396,0.0566,186,0.0525,210,0.0607,0.5303,0.4941,1.0732
8,A <= 8.5 & A <= 3.5,2772,0.396,1432,0.4044,1340,0.3874,0.4834,0.4941,0.9783
9,A <= 8.5 & A > 3.5,3526,0.5037,1775,0.5013,1751,0.5062,0.4966,0.4941,1.005


In [3]:
report[1]

Unnamed: 0,组合策略,命中数,命中率,好样本数,好样本占比,坏样本数,坏样本占比,坏率,样本整体坏率,LIFT值
0,B <= 0.495 & 时间 > 0.943,96,0.032,50,0.0329,46,0.031,0.4792,0.494,0.97
1,B <= 0.495 & 时间 <= 0.943,1604,0.5347,825,0.5435,779,0.5256,0.4857,0.494,0.9831
2,B > 0.495 & 时间 <= 0.877,1146,0.382,570,0.3755,576,0.3887,0.5026,0.494,1.0174
3,B > 0.495 & 时间 > 0.877,154,0.0513,73,0.0481,81,0.0547,0.526,0.494,1.0647
4,B <= 0.495 & A <= 8.5,1535,0.5117,791,0.5211,744,0.502,0.4847,0.494,0.9812
5,B > 0.495 & A > 4.5,617,0.2057,306,0.2016,311,0.2099,0.5041,0.494,1.0203
6,B > 0.495 & A <= 4.5,683,0.2277,337,0.222,346,0.2335,0.5066,0.494,1.0255
7,B <= 0.495 & A > 8.5,165,0.055,84,0.0553,81,0.0547,0.4909,0.494,0.9937
8,A <= 8.5 & A <= 3.5,1214,0.4047,615,0.4051,599,0.4042,0.4934,0.494,0.9988
9,A <= 8.5 & A > 3.5,1487,0.4957,744,0.4901,743,0.5013,0.4997,0.494,1.0115


In [4]:
report[2]

Unnamed: 0,组合策略,命中数,命中率,好样本数,好样本占比,坏样本数,坏样本占比,坏率,样本整体坏率,LIFT值
0,B <= 0.495 & 时间 > 0.943,242,0.0346,141,0.0398,101,0.0292,0.4174,0.4941,0.8446
1,B <= 0.495 & 时间 <= 0.943,3759,0.537,1934,0.5462,1825,0.5276,0.4855,0.4941,0.9825
2,B > 0.495 & 时间 <= 0.877,2668,0.3811,1320,0.3728,1348,0.3897,0.5052,0.4941,1.0225
3,B > 0.495 & 时间 > 0.877,331,0.0473,146,0.0412,185,0.0535,0.5589,0.4941,1.1311
4,B <= 0.495 & A <= 8.5,3605,0.515,1889,0.5335,1716,0.4961,0.476,0.4941,0.9633
5,B > 0.495 & A > 4.5,1496,0.2137,749,0.2115,747,0.216,0.4993,0.4941,1.0105
6,B > 0.495 & A <= 4.5,1503,0.2147,717,0.2025,786,0.2272,0.523,0.4941,1.0583
7,B <= 0.495 & A > 8.5,396,0.0566,186,0.0525,210,0.0607,0.5303,0.4941,1.0732
8,A <= 8.5 & A <= 3.5,2772,0.396,1432,0.4044,1340,0.3874,0.4834,0.4941,0.9783
9,A <= 8.5 & A > 3.5,3526,0.5037,1775,0.5013,1751,0.5062,0.4966,0.4941,1.005
