In [1]:
# Fuzzy Gradient Boosting Regression Algorithm
# using different fuzzy distances
# by Nasiboglu R.,  Nasibov E.
# February 2022

from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
import numpy as np
import random as r
import pandas as pd
import math


def COA(A):
    s=1 # shape convexity parameter 
    b=(A[0]+s*((A[0]-A[1])+(A[0]+A[2])))/(2*s+1)
    return b

def WABL(A):
    c=0.5 # optimism parameter
    s=1   # shape convexity parameter 
    k=0   # increasing speed of level importances
    b=c*((A[0]+A[2])-((k+1)/(k+s+1))*A[2])+(1-c)*((A[0]-A[1])+((k+1)/(k+s+1))*A[1])
    return b

def fuzAve(a):
    fuz=[0,0,0]
    for i in range(len(a)):
        fuz=fuzAdd(a[i],fuz)
    fuz=[fuz[0]/len(a),fuz[1],fuz[2]]    
    return fuz

def fuzDist(a,b):
    #fuz=max(abs(a[0]-b[0]),abs(a[1]-b[1]),abs(a[2]-b[2])) # D1 distance
    #fuz=abs(a[0]-b[0])+max(abs(a[1]-b[1]),abs(a[2]-b[2])) # D2=D3 distance
    fuz=abs(WABL(fuzSubtr(a,b))) # D4 distance
    return fuz

def fuzRMSE(a,b):
    fuz=0
    for i in range(len(a)):
        fuz+=fuzDist(a[i],b[i])**2
    fuz=math.sqrt(fuz/len(a))
    return fuz

def fuzMAE(a,b):
    fuz=0
    for i in range(len(a)):
        fuz+=fuzDist(a[i],b[i])
    fuz=fuz/len(a)
    return fuz

def fuzR2(a,b,ave):
    fuz1,fuz2=0,0
    for i in range(len(a)):
        fuz1+=fuzDist(a[i],b[i])**2
        fuz2+=fuzDist(a[i],ave)**2
    fuz=1-(fuz1/fuz2)
    return fuz

def fuzSubtr(a,b):
    fuz=[a[0]-b[0],max(a[1],b[1]),max(a[2],b[2])]
    #fuz=[a[0]-b[0],(a[1]+b[2]),(a[2]+b[1])]
    return fuz
             
def fuzAdd(a,b):
    fuz=[a[0]+b[0],max(a[1],b[1]),max(a[2],b[2])]
    #fuz=[a[0]+b[0],(a[1]+b[1]),(a[2]+b[2])]
    return fuz

def fuzMultBy(a,b):
    fuz=[b*a[0],a[1],a[2]]
    return fuz

# Load the Boston Dataset
data = datasets.load_boston()           
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, random_state=42, test_size=0.1)

# Load the Diabetes Dataset
#data=datasets.load_diabetes()
#X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, random_state=42, test_size=0.1)

r.seed(0)

# fuzzy number A=(mode,l_width,r_width)

y_fuz_train = np.c_[y_train,y_train*(0.2*r.random()),y_train*(0.2*r.random())]
y_fuz_test = np.c_[y_test,y_test*(0.2*r.random()),y_test*(0.2*r.random())]

# Standardize the inputs

sc = MinMaxScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.transform(X_test)

maxR2=-99999
maxM=-1

# -------- Parameters ----------
M=201
learning_rate=0.1
tree_depth=1
max_leaf=2**tree_depth

F=[[[0,0,0] for _ in range(len(y_fuz_train))] for i in range(M)]
F[0]=[fuzAve(y_fuz_train) for _ in range(len(y_fuz_train))]

gamma=[[[0,0,0] for i in range(max_leaf)] for j in range(M)]
trees=[]

# boosting iterations
for m in range(1,M):    
    #print("------------------------------- Tree %d ----------------------------------" % (m))
    r=[fuzSubtr(y_fuz_train[i],F[m-1][i]) for i in range(len(y_fuz_train))]

    # stump tree is constructed up to centers or WABL values of FN
    r1=[r[i][0] for i in range(len(r))]
    #r1=[WABL(r[i]) for i in range(len(r))]
    tree = DecisionTreeRegressor(random_state=0,max_depth=tree_depth)
    tree.fit(X_train_std, r1)
    trees.append(tree)
    h=tree.apply(X_train_std)   
    
    # Actual different leaf count 
    h1=list(set(h))

    for l in range(len(h1)):
        leaf_l=[j for j in range(len(r1)) if h[j]==h1[l]] 
        ss=[r[j] for j in leaf_l]
        ss1=np.reshape(ss,(-1,3))
        gamma[m][l]=fuzAve(ss1) #for each leaf node
        for k in leaf_l:
            F[m][k]=fuzAdd(F[m-1][k],fuzMultBy(gamma[m][l],learning_rate)) 

# prediction
print("----------- Train set fuzRMSE -------------------")
X1=X_train_std
fuzY=y_fuz_train

FM=F[0]
ave=F[0][0]   # average of train set

for m in range(1,M):
    h=trees[m-1].apply(X1)
    h1=list(set(h))
    for l in range(len(h1)):
        leaf_l=[j for j in range(len(X1)) if h[j]==h1[l]] 
        for k in leaf_l:
            FF=fuzAdd(FM[k],fuzMultBy(gamma[m][l],learning_rate))
            FM[k]=FF    #for each xi of each leaf node 

    R2=fuzR2(fuzY,FM,ave)
    if R2>maxR2:
        maxR2=R2
        maxM=m
    if m%10==0:
        print(m,fuzRMSE(fuzY,FM))
    
print("-- the best R^2 value and according iteration number --")
print("maxM =",maxM)
print("maxR2 =",maxR2)

print("----------- Test set fuzRMSE -------------------")
X1=X_test_std
fuzY=y_fuz_test


FM=F[0]   
ave=F[0][0]   # average of train set

for m in range(1,M):
    h=trees[m-1].apply(X1)
    h1=list(set(h))
    for l in range(len(h1)):
        leaf_l=[j for j in range(len(X1)) if h[j]==h1[l]] 
        for k in leaf_l:
            FF=fuzAdd(FM[k],fuzMultBy(gamma[m][l],learning_rate))
            FM[k]=FF    #for each xi of each leaf node 

    R2=fuzR2(fuzY,FM,ave)
    if R2>maxR2:
        maxR2=R2
        maxM=m
    if m%10==0:
        print(fuzRMSE(fuzY,FM))   


----------- Train set fuzRMSE -------------------
10 6.440003794019169
20 5.130114387919714
30 4.486818211761129
40 4.1103426769695215
50 3.853519744183672
60 3.6684084532948606
70 3.5323935153713646
80 3.4270042340015756
90 3.341461917573688
100 3.2688281724534427
110 3.2053769790664766
120 3.149081223431103
130 3.098941739303888
140 3.0529512694278464
150 3.0108741802364145
160 2.973087097064224
170 2.9384279267167592
180 2.90664964278755
190 2.8773083690396333
200 2.8508454670051018
-- the best R^2 value and according iteration number --
maxM = 200
maxR2 = 0.9062280941440357
----------- Test set fuzRMSE -------------------
10.48115593798676
9.784459696392863
9.481750205321648
9.650644522044159
10.31065390758833
11.763875307503358
12.552444703576727
13.770566754396324
14.271025953051197
15.339603014693907
15.306491349798494
15.74854061297507
15.730166352079669
16.10368039470645
16.835857943269858
17.331171018032304
17.742371868985575
18.061160286503984
18.74932069624271
19.4365227022