In [24]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score

from scipy.stats import entropy


import lib as lib


### Working on breast cancer data

In [25]:
# import some data to play with
#load the breast cancer dataset 
bcan = datasets.load_breast_cancer()
X = bcan.data
y = bcan.target

X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.33, random_state=42)


# normalize the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)


In [26]:
# initialize params1 and params2

params1=[lib.initialize_weights(X_train.shape[1]) for i in range(100)]# a vector of shape 100,4
# call the initialize_weights function above

params2=[np.random.uniform() for i in range(100)]# a vector of shape 100
# use the np.random.uniform() function

# we have a list of 100 weight vectors (params1) and 100 thresholds (params2)
# convert them to array
params1=np.array(params1)
params2=np.array(params2)


print("Shape of params 1 (weights)",params1.shape)
print("Shape of params 2 (thresholds)",params2.shape)

Shape of params 1 (weights) (100, 30)
Shape of params 2 (thresholds) (100,)


In [27]:
z = lib.objective_fn_vector(params1, params2, X_train, y_train)
# Find the global minimum
param1_min = params1[z.argmin()] # use z.argmin()
param2_min = params2[z.argmin()] # use z.argmin()

print("param1_min",param1_min,"param2_min",param2_min)
print(lib.objective_fn(param1_min, param2_min, X_train, y_train))

param1_min [0.29332121 0.11841787 0.90289701 0.20699767 0.57530654 0.68620426
 0.74267843 0.46333525 0.74344956 0.32585486 0.18245493 0.05052287
 0.25256524 0.92907513 0.68984837 0.31908874 0.49173868 0.35238822
 0.85396625 0.25328364 0.2837559  0.1907728  0.54990109 0.52196241
 0.14893894 0.68850451 0.91048032 0.48135845 0.57524243 0.55891037] param2_min 0.48199824425601057
0.6643460978641622


In [28]:

# Hyper-parameter of the algorithm
c1 = c2 = 0.1
w1 = np.array([np.random.uniform() for i in range(X_train.shape[1])])
w2 = 0.8 
# Create particles
n_particles = 20
np.random.seed(100)
params1=[lib.initialize_weights(X_train.shape[1]) for i in range(n_particles)] # a vector of shape n_particles,4
# call the initialize_weights function above

params2=[np.random.uniform() for i in range(n_particles)]# a vector of shape n_particles
# use the np.random.uniform() function

params1=np.array(params1)
params2=np.array(params2)

print("params1 shape is ",params1.shape,"params2 shape is ",params2.shape)

params1 shape is  (20, 30) params2 shape is  (20,)


In [29]:
# define velocity of each weight of every particle
V_param1 = [lib.initialize_weights(X_train.shape[1])*0.1 for i in range(n_particles)] # shape is same as params1
# once again can use initialize_weights function

#define velocity of each threshold of every particle
V_param2 = np.array([np.random.uniform()*0.1 for i in range(n_particles)])# shape is same as params2
# once again use np.random.uniform() function

# Initialize objective values
pbest = (params1,params2)
pbest_obj = lib.objective_fn_vector(params1, params2, X_train, y_train)
gbest=(params1[pbest_obj.argmin()],params2[pbest_obj.argmin()])
gbest_obj = pbest_obj.min()

print("pbest obj value for 20 particles are as follows",pbest_obj)
print("gbest obj value among all 20 particles is as follows",gbest_obj)
# note that gbest_obj should be the minimim of all pbest_obj

pbest obj value for 20 particles are as follows [0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461]
gbest obj value among all 20 particles is as follows 0.6643460978641622


In [30]:
def update(V_param1,V_param2, params1,params2, pbest, pbest_obj, gbest, gbest_obj):
    "Function to do one iteration of particle swarm optimization"
    # these have been already initialized in the previous cells
    
    # Update params
    r11,r12, r2 = np.random.rand(3)
    V_param1=w1*V_param1+c1*r11*(pbest[0] - params1)+ c2*r2*(gbest[0]-params1)
    V_param2=w2*V_param2+c1*r12*(pbest[1] - params2)+ c2*r2*(gbest[1]-params2)    
#     V = w * V + c1*r11*(pbest - params1) + c2*r2*(gbest.reshape(-1,1)-X)
    params1 = params1 + V_param1
    params2 = params2 + V_param2
    
    obj = lib.objective_fn_vector(params1, params2, X_train, y_train)
    for i in range(pbest[0].shape[0]):
        if pbest_obj[i]>=obj[i]:
            
            pbest[0][i]=params1[i] # update pbest[0][i] with value of params1[i]
            pbest[1][i]=params2[i] # update pbest[1][i] 
            pbest_obj[i]=obj[i]    # also update pbest_obj[i]

            
    gbest=(params1[pbest_obj.argmin()],params2[pbest_obj.argmin()]) # update gbest to contain the best from params1 and params 2
    gbest_obj = pbest_obj.min() # update gbest to get the minimum of pbest_obj
 


In [31]:
for i in range(100):
    update(V_param1,V_param2, params1,params2, pbest, pbest_obj, gbest, gbest_obj)
print("PSO found best solution at f({})={}".format(gbest, gbest_obj))
print("Global optimal at f({})={}".format([param1_min,param2_min], lib.objective_fn(param1_min, param2_min, X_train, y_train)))


PSO found best solution at f((array([7.31640885, 8.20255845, 1.88323015, 5.06529472, 0.69221023,
       0.52359245, 2.09916329, 7.75930097, 0.73487131, 2.48495038,
       1.60724141, 4.57559109, 0.20111294, 2.3197124 , 4.72442403,
       4.98683773, 3.10366565, 3.96090088, 4.46208784, 4.53378587,
       0.43599061, 1.25557602, 2.81187997, 0.78539846, 3.59300447,
       1.81209468, 1.32580087, 4.90588436, 5.59965104, 2.13579209]), 0.4098464718143954))=0.6643460978641622
Global optimal at f([array([0.29332121, 0.11841787, 0.90289701, 0.20699767, 0.57530654,
       0.68620426, 0.74267843, 0.46333525, 0.74344956, 0.32585486,
       0.18245493, 0.05052287, 0.25256524, 0.92907513, 0.68984837,
       0.31908874, 0.49173868, 0.35238822, 0.85396625, 0.25328364,
       0.2837559 , 0.1907728 , 0.54990109, 0.52196241, 0.14893894,
       0.68850451, 0.91048032, 0.48135845, 0.57524243, 0.55891037]), 0.48199824425601057])=0.6643460978641622


In [34]:
import numpy as np
import lib # assuming lib is a module that contains the required functions

max_tree_size=128
all_optimized_weights_list=[None for i in range(max_tree_size)]
all_optimized_thresh_list=[None for i in range(max_tree_size)]
all_dataset_sizes_list=[None for i in range(max_tree_size)]
all_IG_list=[None for i in range(max_tree_size)]


def find_best_params(train_x,train_y,test_x,test_y,node_number):
    '''
    recursive function to get the best set of weights
    '''
    print("node_number",node_number,"data shape",train_x.shape)
    # exit condition 1: if the node_number is more than the maximum tree size, return
    if node_number>=max_tree_size:
        return
    # exit condition 2: if the training dataset has one or less rows, return
    if train_x.shape[0]<=1:
        return
    # exit condition 3: if the train_y has values from only one class (only 0s or only 1s and so on)
    if len(np.unique(train_y))==1:
        return
    
    # use the initialized lists as global
    global all_optimized_weights_list
    global all_optimized_thresh_list
    global all_dataset_sizes_list
    global all_IG_list

    # Hyper-parameter of the algorithm
    c1 = c2 = 0.1
    w1 = np.array([np.random.uniform() for i in range(X_train.shape[1])])
    w2 = 0.8 
    # Create particles
    n_particles = 20
    np.random.seed(100)
    params1=[lib.initialize_weights(X_train.shape[1]) for i in range(n_particles)] # a vector of shape n_particles,n_features
    # call the initialize_weights function above

    params2=[np.random.uniform() for i in range(n_particles)]# a vector of shape n_particles
    # use the np.random.uniform() function

    params1=np.array(params1)
    params2=np.array(params2)

    # define velocity of each weight of every particle
    V_param1 = [lib.initialize_weights(X_train.shape[1])*0.1 for i in range(n_particles)] # shape is same as params1
    # once again can use initialize_weights function

    #define velocity of each threshold of every particle
    V_param2 = np.array([np.random.uniform()*0.1 for i in range(n_particles)])# shape is same as params2
    # once again use np.random.uniform() function

    # Initialize objective values
    pbest = (params1,params2)
    pbest_obj = lib.objective_fn_vector(params1, params2, X_train, y_train)
    gbest=(params1[pbest_obj.argmin()],params2[pbest_obj.argmin()])
    gbest_obj = pbest_obj.min()
    
    new_ys = None  # initialize new_ys variable
    for i in range(100):
        update(V_param1, V_param2, params1, params2, pbest, pbest_obj, gbest, gbest_obj)
        # calculate new_ys inside the loop
        new_ys = np.dot(train_x, gbest[0])
        # normalize the new_ys
        new_ys = (new_ys - np.min(new_ys)) / (np.max(new_ys) - np.min(new_ys))

    # add the achieved optimized values to the lists
    all_optimized_weights_list[node_number] = gbest[0]
    all_optimized_thresh_list[node_number] = gbest[1]
    all_dataset_sizes_list[node_number] = train_x.shape[0]
    
    
    # Here my code has error
    all_IG_list[node_number] = lib.information_gain(train_y, new_ys)

    # chop the data into two parts: left
    train_x_left = train_x[new_ys >= gbest[1]]
    train_y_left = train_y[new_ys >= gbest[1]]
    left_child_node_num = node_number * 2 + 1

    # chop the data into two parts: right
    train_x_right = train_x[new_ys < gbest[1]]
    train_y_right = train_y[new_ys < gbest[1]]
    right_child_node_num = node_number * 2 + 2

    # exit condition 4: return if information gain is 0
    # here my code has error
    if lib.information_gain(train_y, new_ys) == 0:
        return

    print("Left", train_x_left.shape)
    print("Right", train_x_right.shape)

    # make the recursion call for left
    find_best_params(train_x_left, train_y_left, test_x, test_y, left_child_node_num)

    # make the recursion call for right
    find_best_params(train_x_right, train_y_right, test_x, test_y, right_child_node_num)


In [35]:
node_number=0
find_best_params(X_train,y_train,X_test,y_test,node_number)

node_number 0 data shape (381, 30)


TypeError: information_gain() missing 1 required positional argument: 'count_items'

In [36]:
X_train.shape

(381, 30)

In [37]:
all_optimized_thresh_list

[0.4098464718143954,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [38]:
thresh=all_optimized_thresh_list[0]