In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score

from scipy.stats import entropy


import lib as lib


### Working on breast cancer data

In [2]:
# import some data to play with
#load the breast cancer dataset 
bcan = datasets.load_breast_cancer()
X = bcan.data
y = bcan.target

X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.33, random_state=42)


# normalize the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)


In [3]:
# initialize params1 and params2

params1=[lib.initialize_weights(X_train.shape[1]) for i in range(100)]# a vector of shape 100,4
# call the initialize_weights function above

params2=[np.random.uniform() for i in range(100)]# a vector of shape 100
# use the np.random.uniform() function

# we have a list of 100 weight vectors (params1) and 100 thresholds (params2)
# convert them to array
params1=np.array(params1)
params2=np.array(params2)


print("Shape of params 1 (weights)",params1.shape)
print("Shape of params 2 (thresholds)",params2.shape)

Shape of params 1 (weights) (100, 30)
Shape of params 2 (thresholds) (100,)


In [16]:
z = lib.objective_fn_vector(params1, params2, X_train, y_train)
# Find the global minimum
param1_min = params1[z.argmin()] # use z.argmin()
param2_min = params2[z.argmin()] # use z.argmin()

print("param1_min",param1_min,"param2_min",param2_min)
print(lib.objective_fn(param1_min, param2_min, X_train, y_train))

param1_min [5.11447013 4.53677123 0.70535817 4.43743479 1.78208326 0.28122191
 1.41854031 4.44520929 1.20762897 2.35031583 1.76752274 2.39792243
 1.08385403 2.33111824 2.13771321 2.99321449 6.31989251 0.82644216
 5.41262779 0.82690422 4.08373459 4.79323638 0.89788593 6.34780814
 5.9440157  1.3719993  2.47881292 6.31202285 4.50535511 2.38176836] param2_min 1.5023165347976417
0.6643460978641622


In [17]:

# Hyper-parameter of the algorithm
c1 = c2 = 0.1
w1 = np.array([np.random.uniform() for i in range(X_train.shape[1])])
w2 = 0.8 
# Create particles
n_particles = 20
np.random.seed(100)
params1=[lib.initialize_weights(X_train.shape[1]) for i in range(n_particles)] # a vector of shape n_particles,4
# call the initialize_weights function above

params2=[np.random.uniform() for i in range(n_particles)]# a vector of shape n_particles
# use the np.random.uniform() function

params1=np.array(params1)
params2=np.array(params2)

print("params1 shape is ",params1.shape,"params2 shape is ",params2.shape)

params1 shape is  (20, 30) params2 shape is  (20,)


In [18]:
# define velocity of each weight of every particle
V_param1 = [lib.initialize_weights(X_train.shape[1])*0.1 for i in range(n_particles)] # shape is same as params1
# once again can use initialize_weights function

#define velocity of each threshold of every particle
V_param2 = np.array([np.random.uniform()*0.1 for i in range(n_particles)])# shape is same as params2
# once again use np.random.uniform() function

# Initialize objective values
pbest = (params1,params2)
pbest_obj = lib.objective_fn_vector(params1, params2, X_train, y_train)
gbest=(params1[pbest_obj.argmin()],params2[pbest_obj.argmin()])
gbest_obj = pbest_obj.min()

print("pbest obj value for 20 particles are as follows",pbest_obj)
print("gbest obj value among all 20 particles is as follows",gbest_obj)
# note that gbest_obj should be the minimim of all pbest_obj

pbest obj value for 20 particles are as follows [0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461]
gbest obj value among all 20 particles is as follows 0.6643460978641622


In [19]:
def update(V_param1,V_param2, params1,params2, pbest, pbest_obj, gbest, gbest_obj):
    "Function to do one iteration of particle swarm optimization"
    # these have been already initialized in the previous cells
    
    # Update params
    r11,r12, r2 = np.random.rand(3)
    V_param1=w1*V_param1+c1*r11*(pbest[0] - params1)+ c2*r2*(gbest[0]-params1)
    V_param2=w2*V_param2+c1*r12*(pbest[1] - params2)+ c2*r2*(gbest[1]-params2)    
#     V = w * V + c1*r11*(pbest - params1) + c2*r2*(gbest.reshape(-1,1)-X)
    params1 = params1 + V_param1
    params2 = params2 + V_param2
    
    obj = lib.objective_fn_vector(params1, params2, X_train, y_train)
    for i in range(pbest[0].shape[0]):
        if pbest_obj[i]>=obj[i]:
            
            pbest[0][i]=params1[i] # update pbest[0][i] with value of params1[i]
            pbest[1][i]=params2[i] # update pbest[1][i] 
            pbest_obj[i]=obj[i]    # also update pbest_obj[i]

            
    gbest=(params1[pbest_obj.argmin()],params2[pbest_obj.argmin()]) # update gbest to contain the best from params1 and params 2
    gbest_obj = pbest_obj.min() # update gbest to get the minimum of pbest_obj
 


In [20]:
for i in range(100):
    update(V_param1,V_param2, params1,params2, pbest, pbest_obj, gbest, gbest_obj)
print("PSO found best solution at f({})={}".format(gbest, gbest_obj))
print("Global optimal at f({})={}".format([param1_min,param2_min], lib.objective_fn(param1_min, param2_min, X_train, y_train)))


PSO found best solution at f((array([2.79946358, 1.41070955, 3.81947051, 2.69885723, 1.20412079,
       0.55522579, 2.44888221, 4.44079089, 2.93263782, 2.46988633,
       1.33231943, 0.43077251, 0.64108794, 3.57494637, 6.17952789,
       3.71000318, 5.43441796, 1.60172093, 9.20482242, 1.4964306 ,
       2.92058616, 1.72006017, 2.57608585, 3.68955652, 1.23193181,
       5.48392332, 5.50967524, 4.38460724, 4.53420241, 1.52702865]), 0.4098464718143954))=0.6643460978641622
Global optimal at f([array([5.11447013, 4.53677123, 0.70535817, 4.43743479, 1.78208326,
       0.28122191, 1.41854031, 4.44520929, 1.20762897, 2.35031583,
       1.76752274, 2.39792243, 1.08385403, 2.33111824, 2.13771321,
       2.99321449, 6.31989251, 0.82644216, 5.41262779, 0.82690422,
       4.08373459, 4.79323638, 0.89788593, 6.34780814, 5.9440157 ,
       1.3719993 , 2.47881292, 6.31202285, 4.50535511, 2.38176836]), 1.5023165347976417])=0.6643460978641622


In [69]:
max_tree_size=128
all_optimized_weights_list=[None for i in range(max_tree_size)]
all_optimized_thresh_list=[None for i in range(max_tree_size)]
all_dataset_sizes_list=[None for i in range(max_tree_size)]
all_IG_list=[None for i in range(max_tree_size)]


def find_best_params(train_x,train_y,test_x,test_y,node_number):
    '''
    recursive function to get the best set of weights
    '''
    print("node_number",node_number,"data shape",train_x.shape)
    # exit condition 1: if the node_number is more than the maximum tree size, return
    if node_number>=max_tree_size:
        return
    # exit condition 2: if the training dataset has one or less rows, return 
    if train_x.shape[0]<=1:
        return
    # exit condition 3: if the train_y has values from only one class (only 0s or only 1s and so on)
    if np.unique(train_y).shape[0]==1:
        return
    # use the initialized lists as global
    global all_optimized_weights_list
    global all_optimized_thresh_list
    global all_dataset_sizes_list
    

    # Hyper-parameter of the algorithm
    c1 = c2 = 0.1
    w1 = np.array([np.random.uniform() for i in range(X_train.shape[1])])
    w2 = 0.8 
    # Create particles
    n_particles = 20
    np.random.seed(100)
    params1=[lib.initialize_weights(X_train.shape[1]) for i in range(n_particles)] # a vector of shape n_particles,n_features
    # call the initialize_weights function above

    params2=[np.random.uniform() for i in range(n_particles)]# a vector of shape n_particles
    # use the np.random.uniform() function

    params1=np.array(params1)
    params2=np.array(params2)

#     print("params1 shape is ",params1.shape,"params2 shape is ",params2.shape)    
    
    # define velocity of each weight of every particle
    V_param1 = [lib.initialize_weights(X_train.shape[1])*0.1 for i in range(n_particles)] # shape is same as params1
    # once again can use initialize_weights function

    #define velocity of each threshold of every particle
    V_param2 = np.array([np.random.uniform()*0.1 for i in range(n_particles)])# shape is same as params2
    # once again use np.random.uniform() function

    # Initialize objective values
    pbest = (params1,params2)
    pbest_obj = lib.objective_fn_vector(params1, params2, X_train, y_train)
    gbest=(params1[pbest_obj.argmin()],params2[pbest_obj.argmin()])
    gbest_obj = pbest_obj.min()

#     print("pbest obj value for 20 particles are as follows",pbest_obj)
#     print("gbest obj value among all 20 particles is as follows",gbest_obj)   
    
    for i in range(100):
        update(V_param1,V_param2, params1,params2, pbest, pbest_obj, gbest, gbest_obj)
#     print("PSO found best solution at f({})={}".format(gbest, gbest_obj))
    
    # add the achieved optimized values to the lists
    all_optimized_weights_list[node_number]=gbest[0]
    all_optimized_thresh_list[node_number]=gbest[1]
    all_dataset_sizes_list[node_number]=train_x.shape[0]
    all_IG_list[node_number]=gbest_obj
#     print("threshold is ",gbest[1])
#     print("ys are ",train_y)
    new_ys=np.dot(train_x,gbest[0])
    
    new_ys=(new_ys-np.min(new_ys))/(np.max(new_ys)-np.min(new_ys))
#     print("new_ys are",new_ys)
    
    
    # chop the data into two parts: left
    train_x_left=train_x[new_ys>=gbest[1]]
    train_y_left=train_y[new_ys>=gbest[1]]
    left_child_node_num=node_number*2+1
    # chop the data into two parts: right
    train_x_right=train_x[new_ys<gbest[1]]
    train_y_right=train_y[new_ys<gbest[1]]    
    right_child_node_num=node_number*2+2
    
    # return if information gain is 0
    if gbest_obj==0:
        return
    
    print("Left",train_x_left.shape)
    print("Right",train_x_right.shape)
    # make the recursion call for left
    find_best_params(train_x_left,train_y_left,test_x,test_y,left_child_node_num)
    # make the recursion call for right
    find_best_params(train_x_right,train_y_right,test_x,test_y,right_child_node_num)    
    
    
    
    
    

In [70]:
node_number=0
find_best_params(X_train,y_train,X_test,y_test,node_number)

node_number 0 data shape (381, 30)
Left (101, 30)
Right (280, 30)
node_number 1 data shape (101, 30)
Left (17, 30)
Right (84, 30)
node_number 3 data shape (17, 30)
Left (6, 30)
Right (11, 30)
node_number 7 data shape (6, 30)
node_number 8 data shape (11, 30)
Left (4, 30)
Right (7, 30)
node_number 17 data shape (4, 30)
node_number 18 data shape (7, 30)
Left (5, 30)
Right (2, 30)
node_number 37 data shape (5, 30)
Left (3, 30)
Right (2, 30)
node_number 75 data shape (3, 30)
Left (2, 30)
Right (1, 30)
node_number 151 data shape (2, 30)
node_number 152 data shape (1, 30)
node_number 76 data shape (2, 30)
node_number 38 data shape (2, 30)
node_number 4 data shape (84, 30)
Left (33, 30)
Right (51, 30)
node_number 9 data shape (33, 30)
node_number 10 data shape (51, 30)
Left (28, 30)
Right (23, 30)
node_number 21 data shape (28, 30)
Left (16, 30)
Right (12, 30)
node_number 43 data shape (16, 30)
Left (12, 30)
Right (4, 30)
node_number 87 data shape (12, 30)
Left (7, 30)
Right (5, 30)
node_numb

In [58]:
X_train.shape

(381, 30)

In [71]:
all_optimized_thresh_list

[0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 None,
 0.4098464718143954,
 None,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 None,
 None,
 None,
 None,
 0.4098464718143954,
 None,
 None,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 0.4098464718143954,
 None,
 None,
 None,
 None,
 None,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 None,
 0.4098464718143954,
 None,
 0.4098464718143954,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [48]:
thresh=all_optimized_thresh_list[0]