In [1]:
from weightensemble import WeightForestClassifier
from sklearn.ensemble import RandomForestClassifier

In [2]:
import numpy as np
import numpy.random as randGen

from sklearn import clone

# 판별함수
def make_label(x):
    if x[0] > 0.95:
        return 0
    result = 1
    if x[1] < 0:
        result += 1
    if (x[2] <= 0.8 and x[3] <= 0.8) or (x[2] > 0.8 and x[3] > 0.8):
        result += 1
    return result

# 정규분포를 따르는 4개의 features를 가지는 X, 위의 판별함수를 사용하여 y, 로 dataset을 만드는 함수
def make_dataset(bias, norm_scale, data_size):
    x0 = randGen.normal(bias[0], norm_scale, data_size)
    x1 = randGen.normal(bias[1], norm_scale, data_size)
    x2 = randGen.normal(bias[2], norm_scale, data_size)
    x3 = randGen.normal(bias[3], norm_scale, data_size)
    X = np.stack((x0, x1, x2, x3), axis=1)
    y = np.array(list(map(make_label, X)))
    return X, y

In [3]:
np.random.seed(None)

# 99.9%의 확률로 (-1에서 1사이의 값 + bias)가 나오는 정규분포를 features로 가지는 100,000개의 dataset
fitX, fity = make_dataset((0, 0, 0, 0), 1/3, 100000)

rfc = clone(RandomForestClassifier(max_depth=None, n_estimators=100))
rfc.fit(fitX, fity)

# 이름 짓는 방식 : wfc_(log10(reward)+4)_(log10(punishment)+2)_(각 회차마다 훈련시킬 데이터크기)
wfc_2_2_100 = clone(WeightForestClassifier(max_depth=None, n_estimators=100))
wfc_2_2_100.fit(fitX, fity)

wfc_0_0_100 = clone(WeightForestClassifier(max_depth=None, n_estimators=100, reward=0.0001, punishment=0.01))
wfc_0_0_100.fit(fitX, fity)

wfc_2_2_1 = clone(WeightForestClassifier(max_depth=None, n_estimators=100))
wfc_2_2_1.fit(fitX, fity)

wfc_0_0_1 = clone(WeightForestClassifier(max_depth=None, n_estimators=100, reward=0.0001, punishment=0.01))
wfc_0_0_1.fit(fitX, fity)

WeightForestClassifier(punishment=0.01, reward=0.0001)

In [4]:
# bias를 0.05씩 늘려가면서 1000개씩의 dataset으로 가중치에 학습없이 score확인
print("rfc | wfc_2_2_100 | wfc_0_0_100 | wfc_2_2_1 | wfc_0_0_1")
for i in range(20):
    bias = i * 0.05
    testX, testy = make_dataset((bias, bias, bias, bias), 1/3, 1000)
    print(rfc.score(testX, testy), "|",
          wfc_2_2_100.score(testX, testy), "|",
          wfc_0_0_100.score(testX, testy), "|",
          wfc_2_2_1.score(testX, testy), "|",
          wfc_0_0_1.score(testX, testy))

rfc | wfc_2_2_100 | wfc_0_0_100 | wfc_2_2_1 | wfc_0_0_1
1.0 | 1.0 | 1.0 | 1.0 | 1.0
1.0 | 1.0 | 1.0 | 1.0 | 1.0
1.0 | 1.0 | 1.0 | 1.0 | 1.0
0.999 | 1.0 | 0.998 | 1.0 | 1.0
0.999 | 0.999 | 0.999 | 0.999 | 0.999
0.999 | 0.999 | 0.998 | 0.999 | 0.998
0.995 | 0.994 | 0.994 | 0.994 | 0.993
0.994 | 0.994 | 0.993 | 0.994 | 0.994
0.986 | 0.99 | 0.985 | 0.986 | 0.985
0.981 | 0.987 | 0.98 | 0.982 | 0.98
0.974 | 0.986 | 0.974 | 0.981 | 0.975
0.965 | 0.972 | 0.963 | 0.97 | 0.964
0.938 | 0.95 | 0.934 | 0.949 | 0.934
0.912 | 0.925 | 0.911 | 0.932 | 0.914
0.88 | 0.903 | 0.876 | 0.908 | 0.883
0.842 | 0.87 | 0.841 | 0.883 | 0.842
0.807 | 0.828 | 0.798 | 0.879 | 0.807
0.751 | 0.777 | 0.746 | 0.853 | 0.756
0.688 | 0.72 | 0.684 | 0.845 | 0.702
0.623 | 0.646 | 0.614 | 0.837 | 0.632


In [5]:
# 같은 방식으로 가중치를 학습시켜가며 score확인
for i in range(20):
    bias = i * 0.05
    testX, testy = make_dataset((bias, bias, bias, bias), 1/3, 1000)

    trainX_100 = testX[:100]
    trainy_100 = testy[:100]
    wfc_2_2_100.weight_fit(testX, testy)
    wfc_0_0_100.weight_fit(testX, testy)
    trainX_1 = testX[:1]
    trainy_1 = testy[:1]
    wfc_2_2_1.weight_fit(testX, testy)
    wfc_0_0_1.weight_fit(testX, testy)

    print(rfc.score(testX, testy), "|",
          wfc_2_2_100.score(testX, testy), "|",
          wfc_0_0_100.score(testX, testy), "|",
          wfc_2_2_1.score(testX, testy), "|",
          wfc_0_0_1.score(testX, testy))

0.999 | 0.999 | 0.999 | 0.999 | 0.999
1.0 | 1.0 | 1.0 | 1.0 | 1.0
0.999 | 1.0 | 1.0 | 0.999 | 1.0
0.999 | 0.999 | 0.999 | 1.0 | 1.0
0.998 | 0.999 | 0.998 | 0.998 | 0.998
0.994 | 0.995 | 0.994 | 0.994 | 0.994
0.998 | 0.999 | 0.997 | 0.999 | 0.999
0.99 | 0.991 | 0.989 | 0.989 | 0.99
0.984 | 0.987 | 0.984 | 0.988 | 0.985
0.982 | 0.992 | 0.981 | 0.989 | 0.982
0.964 | 0.981 | 0.963 | 0.982 | 0.967
0.959 | 0.971 | 0.958 | 0.997 | 0.96
0.937 | 0.956 | 0.933 | 0.996 | 0.938
0.921 | 0.997 | 0.917 | 0.997 | 0.926
0.87 | 0.99 | 0.869 | 0.993 | 0.879
0.839 | 0.991 | 0.839 | 0.997 | 0.851
0.813 | 0.994 | 0.809 | 0.996 | 0.826
0.718 | 0.994 | 0.718 | 0.999 | 0.733
0.718 | 0.99 | 0.718 | 1.0 | 0.761
0.635 | 0.989 | 0.641 | 0.996 | 0.674


In [6]:
print(wfc_2_2_100.estimators_weight_)
print(wfc_0_0_100.estimators_weight_)
print(wfc_2_2_1.estimators_weight_)
print(wfc_0_0_1.estimators_weight_)

[-1157.43 -1741.21 -2444.17 -1622.03 -1545.27 -1455.38  -746.36 -2442.15
 -1721.01 -1943.21 -1446.29 -1393.77 -1183.69 -1404.88    90.93 -1397.81
   155.57 -1541.23 -1308.93  -638.29 -2702.73 -1107.94 -2496.69 -2331.05
  -721.11 -1491.74 -1880.59 -2465.38 -1555.37 -1130.16 -1582.64 -2497.7
 -1204.9  -2436.09    31.34 -2558.3  -2765.35 -1883.62   124.26  -427.2
  -543.35 -1253.38 -1293.78 -2216.92 -1118.04 -1115.01 -2723.94 -1106.93
 -1643.24 -2077.54   192.94 -1167.53 -1547.29 -2797.67 -2034.11 -1167.53
 -1501.84 -1569.51 -1103.9  -1745.25  -753.43 -2445.18 -1205.91 -1218.03
 -1408.92 -1078.65 -1268.53 -1831.1   -527.19 -1264.49 -1586.68 -2436.09
 -1963.41   206.07 -1221.06 -1406.9     40.43 -2450.23 -1855.34 -1278.63
 -2459.32 -1217.02 -1134.2  -3028.96  -822.11 -1452.35 -1411.95 -1735.15
 -2504.77 -1321.05 -1875.54 -2661.32 -2480.53 -2034.11 -1101.88 -2488.61
  -630.21 -1177.63 -1058.45 -3096.63]
[ 82.6686  84.3553  66.6399  88.1529  87.8398  84.0523  88.062   73.8715
  85.6279  74.7