In [2]:
# Fuzzy Gradient Boosting Regression Algorithm
# using different fuzzy distances
# by Nasiboglu R.,  Nasibov E.
# February 2022

from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
import numpy as np
import random as r
import pandas as pd
import math

# Center of Area defuzzification for Fuzzy Number
def COA(A):
    s=1 # shape convexity parameter 
    b=(A[0]+s*((A[0]-A[1])+(A[0]+A[2])))/(2*s+1)
    return b

# WABL defuzzification for Fuzzy Number
def WABL(A):
    c=0.5 # optimism parameter
    s=1   # shape convexity parameter 
    k=0   # increasing speed of level importances
    b=c*((A[0]+A[2])-((k+1)/(k+s+1))*A[2])+(1-c)*((A[0]-A[1])+((k+1)/(k+s+1))*A[1])
    return b

# fuzzy average of fuzzy numbers
def fuzAve(a):
    fuz=[0,0,0]
    for i in range(len(a)):
        fuz=fuzAdd(a[i],fuz)
    fuz=[fuz[0]/len(a),fuz[1],fuz[2]]    
    return fuz

# distance betwee fuzzy numbers
def fuzDist(a,b):
    # You can select the required distance by deleting the comment
    
    fuz=max(abs(a[0]-b[0]),abs(a[1]-b[1]),abs(a[2]-b[2])) # D1 distance
    #fuz=abs(a[0]-b[0])+max(abs(a[1]-b[1]),abs(a[2]-b[2])) # D2=D3 distance
    #fuz=abs(WABL(fuzSubtr(a,b))) # D4 distance
    return fuz

# Calculating of RMSE value according to fuzzy numbers
def fuzRMSE(a,b):
    fuz=0
    for i in range(len(a)):
        fuz+=fuzDist(a[i],b[i])**2
    fuz=math.sqrt(fuz/len(a))
    return fuz

# Calculating of MAE value according to fuzzy numbers
def fuzMAE(a,b):
    fuz=0
    for i in range(len(a)):
        fuz+=fuzDist(a[i],b[i])
    fuz=fuz/len(a)
    return fuz

# Calculating of R-squared value according to given average of fuzzy numbers
def fuzR2(a,b,ave):
    fuz1,fuz2=0,0
    for i in range(len(a)):
        fuz1+=fuzDist(a[i],b[i])**2
        fuz2+=fuzDist(a[i],ave)**2
    fuz=1-(fuz1/fuz2)
    return fuz

# fuzzy Subtraction A-B of fuzzy numbers
def fuzSubtr(a,b):
    fuz=[a[0]-b[0],max(a[1],b[1]),max(a[2],b[2])]
    #fuz=[a[0]-b[0],(a[1]+b[2]),(a[2]+b[1])]
    return fuz
             
# fuzzy Addition A+B of fuzzy numbers
    def fuzAdd(a,b):
    fuz=[a[0]+b[0],max(a[1],b[1]),max(a[2],b[2])]
    #fuz=[a[0]+b[0],(a[1]+b[1]),(a[2]+b[2])]
    return fuz

# Multiplication of fuzzy number A by scalar b
def fuzMultBy(a,b):
    fuz=[b*a[0],a[1],a[2]]
    return fuz

# Load the Boston Dataset
data = datasets.load_boston()           
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, random_state=42, test_size=0.1)

# Load the Diabetes Dataset
#data=datasets.load_diabetes()
#X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, random_state=42, test_size=0.1)

r.seed(0)

# fuzzy number A=(mode,l_width,r_width)

y_fuz_train = np.c_[y_train,y_train*(0.2*r.random()),y_train*(0.2*r.random())]
y_fuz_test = np.c_[y_test,y_test*(0.2*r.random()),y_test*(0.2*r.random())]

# Standardize the inputs

sc = MinMaxScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.transform(X_test)

maxR2=-99999
maxM=-1

# -------- Parameters ----------
M=201
learning_rate=0.1
tree_depth=1
max_leaf=2**tree_depth

F=[[[0,0,0] for _ in range(len(y_fuz_train))] for i in range(M)]
F[0]=[fuzAve(y_fuz_train) for _ in range(len(y_fuz_train))]

gamma=[[[0,0,0] for i in range(max_leaf)] for j in range(M)]
trees=[]

# boosting iterations
for m in range(1,M):    
    #print("------------------------------- Tree %d ----------------------------------" % (m))
    r=[fuzSubtr(y_fuz_train[i],F[m-1][i]) for i in range(len(y_fuz_train))]

    # stump tree is constructed up to centers or WABL values of FN
    r1=[r[i][0] for i in range(len(r))]
    #r1=[WABL(r[i]) for i in range(len(r))]
    tree = DecisionTreeRegressor(random_state=0,max_depth=tree_depth)
    tree.fit(X_train_std, r1)
    trees.append(tree)
    h=tree.apply(X_train_std)   
    
    # Actual different leaf count 
    h1=list(set(h))

    for l in range(len(h1)):
        leaf_l=[j for j in range(len(r1)) if h[j]==h1[l]] 
        ss=[r[j] for j in leaf_l]
        ss1=np.reshape(ss,(-1,3))
        gamma[m][l]=fuzAve(ss1) #for each leaf node
        for k in leaf_l:
            F[m][k]=fuzAdd(F[m-1][k],fuzMultBy(gamma[m][l],learning_rate)) 

# prediction
print("----------- Train set fuzRMSE -------------------")
X1=X_train_std
fuzY=y_fuz_train

FM=F[0]
ave=F[0][0]   # average of train set

for m in range(1,M):
    h=trees[m-1].apply(X1)
    h1=list(set(h))
    for l in range(len(h1)):
        leaf_l=[j for j in range(len(X1)) if h[j]==h1[l]] 
        for k in leaf_l:
            FF=fuzAdd(FM[k],fuzMultBy(gamma[m][l],learning_rate))
            FM[k]=FF    #for each xi of each leaf node 

    R2=fuzR2(fuzY,FM,ave)
    if R2>maxR2:
        maxR2=R2
        maxM=m
    if m%10==0:
        print(m,fuzRMSE(fuzY,FM))
    
print("-- the best R^2 value and according iteration number --")
print("maxM =",maxM)
print("maxR2 =",maxR2)

print("----------- Test set fuzRMSE -------------------")
X1=X_test_std
fuzY=y_fuz_test


FM=F[0]   
ave=F[0][0]   # average of train set

for m in range(1,M):
    h=trees[m-1].apply(X1)
    h1=list(set(h))
    for l in range(len(h1)):
        leaf_l=[j for j in range(len(X1)) if h[j]==h1[l]] 
        for k in leaf_l:
            FF=fuzAdd(FM[k],fuzMultBy(gamma[m][l],learning_rate))
            FM[k]=FF    #for each xi of each leaf node 

    R2=fuzR2(fuzY,FM,ave)
    if R2>maxR2:
        maxR2=R2
        maxM=m
    if m%10==0:
        print(fuzRMSE(fuzY,FM))   


----------- Train set fuzRMSE -------------------
10 7.295080287834705
20 6.366498683696033
30 5.990253368825582
40 5.780548594878055
50 5.645294366244949
60 5.548110956297047
70 5.487462433989691
80 5.436410310971753
90 5.400693235491424
100 5.373509371289886
110 5.345387139916088
120 5.323636379164584
130 5.303703021705545
140 5.285665628546654
150 5.265783654920957
160 5.251202255677963
170 5.237670903480324
180 5.225149418045836
190 5.213554709398175
200 5.203601673073672
-- the best R^2 value and according iteration number --
maxM = 200
maxR2 = 0.7150350126425344
----------- Test set fuzRMSE -------------------
11.287582660001688
10.736200354317207
10.49753251052105
10.388262815528885
10.809506713771173
12.108482917707185
12.84204826078671
13.979657723734993
14.449109408570612
15.450664298921632
15.419695569144466
15.836329641562836
15.81896044050211
16.170013802656467
16.85283206173345
17.319053072214675
17.70723002851354
18.007193780013857
18.66130105956579
19.31772095992084
