In [5]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score

from scipy.stats import entropy


import lib as lib


### Working on breast cancer data

In [6]:
# import some data to play with
#load the breast cancer dataset 
bcan = datasets.load_breast_cancer()
X = bcan.data
y = bcan.target

X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.33, random_state=42)


# normalize the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)


In [7]:
# initialize params1 and params2

params1=[lib.initialize_weights(X_train.shape[1]) for i in range(100)]# a vector of shape 100,4
# call the initialize_weights function above

params2=[np.random.uniform() for i in range(100)]# a vector of shape 100
# use the np.random.uniform() function

# we have a list of 100 weight vectors (params1) and 100 thresholds (params2)
# convert them to array
params1=np.array(params1)
params2=np.array(params2)


print("Shape of params 1 (weights)",params1.shape)
print("Shape of params 2 (thresholds)",params2.shape)

Shape of params 1 (weights) (100, 30)
Shape of params 2 (thresholds) (100,)


In [8]:
z = lib.objective_fn_vector(params1, params2, X_train, y_train)
# Find the global minimum
param1_min = params1[z.argmin()] # use z.argmin()
param2_min = params2[z.argmin()] # use z.argmin()

print("param1_min",param1_min,"param2_min",param2_min)
print(lib.objective_fn(param1_min, param2_min, X_train, y_train))

param1_min [0.49111844 0.35014998 0.11727274 0.68073131 0.59371329 0.56171032
 0.91435475 0.65313456 0.32065589 0.92833308 0.86465253 0.93920052
 0.28019744 0.96604908 0.8247242  0.31898057 0.87119707 0.01873225
 0.05298146 0.71667218 0.02016802 0.52970548 0.90160249 0.15231225
 0.78460136 0.86846865 0.30577201 0.88191992 0.9022196  0.51128841] param2_min 0.5651867014887214
0.6643460978641622


In [9]:

# Hyper-parameter of the algorithm
c1 = c2 = 0.1
w1 = np.array([np.random.uniform() for i in range(X_train.shape[1])])
w2 = 0.8 
# Create particles
n_particles = 20
np.random.seed(100)
params1=[lib.initialize_weights(X_train.shape[1]) for i in range(n_particles)] # a vector of shape n_particles,4
# call the initialize_weights function above

params2=[np.random.uniform() for i in range(n_particles)]# a vector of shape n_particles
# use the np.random.uniform() function

params1=np.array(params1)
params2=np.array(params2)

print("params1 shape is ",params1.shape,"params2 shape is ",params2.shape)

params1 shape is  (20, 30) params2 shape is  (20,)


In [10]:
# define velocity of each weight of every particle
V_param1 = [lib.initialize_weights(X_train.shape[1])*0.1 for i in range(n_particles)] # shape is same as params1
# once again can use initialize_weights function

#define velocity of each threshold of every particle
V_param2 = np.array([np.random.uniform()*0.1 for i in range(n_particles)])# shape is same as params2
# once again use np.random.uniform() function

# Initialize objective values
pbest = (params1,params2)
pbest_obj = lib.objective_fn_vector(params1, params2, X_train, y_train)
gbest=(params1[pbest_obj.argmin()],params2[pbest_obj.argmin()])
gbest_obj = pbest_obj.min()

print("pbest obj value for 20 particles are as follows",pbest_obj)
print("gbest obj value among all 20 particles is as follows",gbest_obj)
# note that gbest_obj should be the minimim of all pbest_obj

pbest obj value for 20 particles are as follows [0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461]
gbest obj value among all 20 particles is as follows 0.6643460978641622


In [11]:
def update(V_param1,V_param2, params1,params2, pbest, pbest_obj, gbest, gbest_obj):
    "Function to do one iteration of particle swarm optimization"
    # these have been already initialized in the previous cells
    
    # Update params
    r11,r12, r2 = np.random.rand(3)
    V_param1=w1*V_param1+c1*r11*(pbest[0] - params1)+ c2*r2*(gbest[0]-params1)
    V_param2=w2*V_param2+c1*r12*(pbest[1] - params2)+ c2*r2*(gbest[1]-params2)    
#     V = w * V + c1*r11*(pbest - params1) + c2*r2*(gbest.reshape(-1,1)-X)
    params1 = params1 + V_param1
    params2 = params2 + V_param2
    
    obj = lib.objective_fn_vector(params1, params2, X_train, y_train)
    for i in range(pbest[0].shape[0]):
        if pbest_obj[i]>=obj[i]:
            
            pbest[0][i]=params1[i] # update pbest[0][i] with value of params1[i]
            pbest[1][i]=params2[i] # update pbest[1][i] 
            pbest_obj[i]=obj[i]    # also update pbest_obj[i]

            
    gbest=(params1[pbest_obj.argmin()],params2[pbest_obj.argmin()]) # update gbest to contain the best from params1 and params 2
    gbest_obj = pbest_obj.min() # update gbest to get the minimum of pbest_obj
 


In [12]:
for i in range(100):
    update(V_param1,V_param2, params1,params2, pbest, pbest_obj, gbest, gbest_obj)
print("PSO found best solution at f({})={}".format(gbest, gbest_obj))
print("Global optimal at f({})={}".format([param1_min,param2_min], lib.objective_fn(param1_min, param2_min, X_train, y_train)))


PSO found best solution at f((array([2.9616904 , 6.46433229, 2.30021511, 5.63653422, 0.12609223,
       0.36952179, 2.74157393, 1.30673481, 2.67857792, 1.24057144,
       2.38407915, 0.41198969, 0.23359411, 2.44580337, 4.39544253,
       8.93547124, 8.62171774, 2.51107622, 3.0258309 , 1.07002231,
       2.97936566, 1.24684029, 2.38322456, 4.0788889 , 0.72816582,
       5.59194509, 4.91497731, 1.40804338, 0.83266728, 2.03364854]), 0.4098464718143954))=0.6643460978641622
Global optimal at f([array([0.49111844, 0.35014998, 0.11727274, 0.68073131, 0.59371329,
       0.56171032, 0.91435475, 0.65313456, 0.32065589, 0.92833308,
       0.86465253, 0.93920052, 0.28019744, 0.96604908, 0.8247242 ,
       0.31898057, 0.87119707, 0.01873225, 0.05298146, 0.71667218,
       0.02016802, 0.52970548, 0.90160249, 0.15231225, 0.78460136,
       0.86846865, 0.30577201, 0.88191992, 0.9022196 , 0.51128841]), 0.5651867014887214])=0.6643460978641622


In [30]:
max_tree_size=128
all_optimized_weights_list=[None for i in range(max_tree_size)]
all_optimized_thresh_list=[None for i in range(max_tree_size)]
all_dataset_sizes_list=[None for i in range(max_tree_size)]
all_IG_list=[None for i in range(max_tree_size)]


def find_best_params(train_x,train_y,test_x,test_y,node_number):
    # Code by bilal
    '''
    recursive function to get the best set of weights
    '''
    print("node_number",node_number,"data shape",train_x.shape)
    # exit condition 1: if the node_number is more than the maximum tree size, return
    if node_number>=max_tree_size:
        return
    # exit condition 2: if the training dataset has one or less rows, return 
    elif train_x.shape[0] <= 1:
      return
      
    
    # exit condition 3: if the train_y has values from only one class (only 0s or only 1s and so on)
    elif len(np.unique(y_train)) == 1:
      return

    # use the initialized lists as global
    global all_optimized_weights_list
    global all_optimized_thresh_list
    global all_dataset_sizes_list
    

    # Hyper-parameter of the algorithm
    c1 = c2 = 0.1
    w1 = np.array([np.random.uniform() for i in range(X_train.shape[1])])
    w2 = 0.8 
    # Create particles
    n_particles = 20
    np.random.seed(100)
    params1=[lib.initialize_weights(X_train.shape[1]) for i in range(n_particles)] # a vector of shape n_particles,n_features
    # call the initialize_weights function above

    params2=[np.random.uniform() for i in range(n_particles)]# a vector of shape n_particles
    # use the np.random.uniform() function

    params1=np.array(params1)
    params2=np.array(params2)

#     print("params1 shape is ",params1.shape,"params2 shape is ",params2.shape)    
    
    # define velocity of each weight of every particle
    V_param1 = [lib.initialize_weights(X_train.shape[1])*0.1 for i in range(n_particles)] # shape is same as params1
    # once again can use initialize_weights function

    #define velocity of each threshold of every particle
    V_param2 = np.array([np.random.uniform()*0.1 for i in range(n_particles)])# shape is same as params2
    # once again use np.random.uniform() function

    # Initialize objective values
    pbest = (params1,params2)
    pbest_obj = lib.objective_fn_vector(params1, params2, X_train, y_train)
    gbest=(params1[pbest_obj.argmin()],params2[pbest_obj.argmin()])
    gbest_obj = pbest_obj.min()

#     print("pbest obj value for 20 particles are as follows",pbest_obj)
#     print("gbest obj value among all 20 particles is as follows",gbest_obj)   
    
    for i in range(100):
        update(V_param1,V_param2, params1,params2, pbest, pbest_obj, gbest, gbest_obj)
#     print("PSO found best solution at f({})={}".format(gbest, gbest_obj))
    
    # add the achieved optimized values to the lists
    all_optimized_weights_list[node_number] = gbest[0]
    all_optimized_thresh_list[node_number] = gbest[1]
    all_dataset_sizes_list[node_number] = train_x.shape[0]
    all_IG_list[node_number] = gbest_obj
    
#     print("threshold is ",gbest[1])
#     print("ys are ",train_y)
    new_ys=np.dot(train_x,gbest[0])
    
    # normalize the new_ys
    new_ys=(new_ys-np.min(new_ys))/(np.max(new_ys) - np.min(new_ys))
#     print("new_ys are",new_ys)
    
    
    # chop the data into two parts: left
    train_x_left=train_x[new_ys>=gbest[1]]
    train_y_left=train_y[new_ys>=gbest[1]]
    left_child_node_num=node_number*2+1

    # chop the data into two parts: right
    train_x_right = train_x[new_ys < gbest[1]]
    train_y_right = train_y[new_ys < gbest[1]]
    right_child_node_num = node_number * 2 + 2
    
    # exit condition 4: return if information gain is 0
    if gbest_obj == 0:
      return
    
    print("Left",train_x_left.shape)
    print("Right",train_x_right.shape)
    # make the recursion call for left
    find_best_params(train_x_left,train_y_left,test_x,test_y,left_child_node_num)
    # make the recursion call for right
    find_best_params(train_x_right,train_y_right,test_x,test_y,right_child_node_num)    
    
    
    
    
    

In [31]:
node_number=0
find_best_params(X_train,y_train,X_test,y_test,node_number)

node_number 0 data shape (381, 30)
Left (99, 30)
Right (282, 30)
node_number 1 data shape (99, 30)
Left (18, 30)
Right (81, 30)
node_number 3 data shape (18, 30)
Left (4, 30)
Right (14, 30)
node_number 7 data shape (4, 30)
Left (2, 30)
Right (2, 30)
node_number 15 data shape (2, 30)
Left (1, 30)
Right (1, 30)
node_number 31 data shape (1, 30)
node_number 32 data shape (1, 30)
node_number 16 data shape (2, 30)
Left (1, 30)
Right (1, 30)
node_number 33 data shape (1, 30)
node_number 34 data shape (1, 30)
node_number 8 data shape (14, 30)
Left (5, 30)
Right (9, 30)
node_number 17 data shape (5, 30)
Left (4, 30)
Right (1, 30)
node_number 35 data shape (4, 30)
Left (3, 30)
Right (1, 30)
node_number 71 data shape (3, 30)
Left (2, 30)
Right (1, 30)
node_number 143 data shape (2, 30)
node_number 144 data shape (1, 30)
node_number 72 data shape (1, 30)
node_number 36 data shape (1, 30)
node_number 18 data shape (9, 30)
Left (5, 30)
Right (4, 30)
node_number 37 data shape (5, 30)
Left (3, 30)
Ri

In [32]:
X_train.shape

(381, 30)

In [34]:
all_optimized_thresh_list

[0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 None,
 None,
 None,
 None,
 0.4098464718143954,
 None,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,
 0.4098464718143954,

In [35]:
thresh=all_optimized_thresh_list[0]