In [1]:

from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import accuracy_score

from scipy.stats import entropy



import raghadlib as lib


In [2]:
# import some data to play with
iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.33, random_state=42)


# normalize the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [3]:

pk = np.array([1/5, 2/5, 2/5])  # fair coin
H = entropy(pk)
print(H)



1.0549201679861442


In [4]:

    
# test your function
list_labels=[1,2,0,1,2,0]
uniq_labels=[0,1,2]
print(lib.calculate_probabilities(list_labels,uniq_labels))
# this should print somehting like 0.33,0.33,0.33



{0: 0.3333333333333333, 1: 0.3333333333333333, 2: 0.3333333333333333}


In [5]:

# test your function
list_probas=[1/5, 2/5, 2/5]
print(lib.calc_entropy_from_probabilities(list_probas))
# above should print 1.054...



1.0549201679861442


In [6]:
#test your function
old_entropy=1
new_entropies=[0,0.65]
count_items=[4,6]
print(lib.information_gain(old_entropy,new_entropies,count_items))
# above should print 0.61
    


0.61


In [7]:

num_feats=X_train.shape[1]
print(lib.initialize_weights(num_feats))



[2 2 2 2]


In [8]:

### Below we just randomly assign 100 particles and see if we can find the global minimum.
### THis is just to check

params1=[lib.initialize_weights(X_train.shape[1]) for i in range(100)]# a vector of shape 100,4
# call the initialize_weights function above

params2=[np.random.uniform() for i in range(100)]# a vector of shape 100
# use the np.random.uniform() function

# we have a list of 100 weight vectors (params1) and 100 thresholds (params2)
# convert them to array
params1=np.array(params1)
params2=np.array(params2)

print("Shape of params 1 (weights)",params1.shape)
print("Shape of params 2 (thresholds)",params2.shape)



Shape of params 1 (weights) (100, 4)
Shape of params 2 (thresholds) (100,)


In [9]:

z = lib.objective_fn_vector(params1, params2, X_train, y_train)
# Find the global minimum
param1_min = params1[z.argmin()] # use z.argmin()
param2_min = params2[z.argmin()] # use z.argmin()

print("param1_min",param1_min,"param2_min",param2_min)


param1_min [2 2 2 2] param2_min 0.9319571287092955


In [10]:

### Setting up the particles and other parameters now
# Hyper-parameter of the algorithm
c1 = c2 = 0.1
w1 = np.array([np.random.uniform() for i in range(X_train.shape[1])])
w2 = 0.8 
# Create particles
n_particles = 20
np.random.seed(100)
params1=[lib.initialize_weights(X_train.shape[1]) for i in range(n_particles)] # a vector of shape n_particles,4
# call the initialize_weights function above

params2=[np.random.uniform() for i in range(n_particles)]# a vector of shape n_particles
# use the np.random.uniform() function

params1=np.array(params1)
params2=np.array(params2)

print("params1 shape is ",params1.shape,"params2 shape is ",params2.shape)


params1 shape is  (20, 4) params2 shape is  (20,)


In [11]:

# define velocity of each weight of every particle
V_param1 = [lib.initialize_weights(X_train.shape[1])*0.1 for i in range(n_particles)] # shape is same as params1
# once again can use initialize_weights function

#define velocity of each threshold of every particle
V_param2 = np.array([np.random.uniform()*0.1 for i in range(n_particles)])# shape is same as params2
# once again use np.random.uniform() function

# Initialize objective values
pbest = (params1,params2)
pbest_obj = lib.objective_fn_vector(params1, params2, X_train, y_train)
gbest=(params1[pbest_obj.argmin()],params2[pbest_obj.argmin()])
gbest_obj = pbest_obj.min()

print("pbest obj value for 20 particles are as follows",pbest_obj)
print("gbest obj value among all 20 particles is as follows",gbest_obj)
# note that gbest_obj should be the minimim of all pbest_obj


pbest obj value for 20 particles are as follows [1.09729975 1.09729975 1.09729975 0.82232957 1.09729975 1.09729975
 0.82232957 0.82232957 1.09729975 1.09729975 0.82232957 1.09729975
 1.09729975 1.09729975 1.09729975 0.82149332 0.82232957 1.09729975
 0.82232957 1.09729975]
gbest obj value among all 20 particles is as follows 0.8214933202884899


In [12]:

def update():
    "Function to do one iteration of particle swarm optimization"
    global V_param1,V_param2, params1,params2, pbest, pbest_obj, gbest, gbest_obj
    # these have been already initialized in the previous cells
    
    # Update params
    r11,r12, r2 = np.random.rand(3)
    V_param1=w1*V_param1+c1*r11*(pbest[0] - params1)+ c2*r2*(gbest[0]-params1)
    V_param2=w2*V_param2+c1*r12*(pbest[1] - params2)+ c2*r2*(gbest[1]-params2)    
#     V = w * V + c1*r11*(pbest - params1) + c2*r2*(gbest.reshape(-1,1)-X)
    params1 = params1 + V_param1
    params2 = params2 + V_param2
    
    obj = lib.objective_fn_vector(params1, params2, X_train, y_train)
    for i in range(pbest[0].shape[0]):
        if pbest_obj[i]>=obj[i]:
            
            pbest[0][i]=params1[i] # update pbest[0][i] with value of params1[i]
            pbest[1][i]=params2[i] # update pbest[1][i] 
            pbest_obj[i]=obj[i]    # also update pbest_obj[i]

            
    gbest=(params1[pbest_obj.argmin()],params2[pbest_obj.argmin()]) # update gbest to contain the best from params1 and params 2
    gbest_obj = pbest_obj.min() # update gbest to get the minimum of pbest_obj


In [13]:
 
for i in range(100):
    update()
print("PSO found best solution at f({})={}".format(gbest, gbest_obj))
print("Global optimal at f({})={}".format([param1_min,param2_min], lib.objective_fn(param1_min, param2_min, X_train, y_train)))


PSO found best solution at f((array([ 2.00002176e+00, -7.09358920e-05,  2.88383055e-04,  2.00003907e+00]), 1.3789660702171247))=0.5237323047939855
Global optimal at f([array([2, 2, 2, 2]), 0.9319571287092955])=0.8214933202884899


In [14]:

# import some data to play with
#load the breast cancer dataset 
from sklearn.datasets import load_breast_cancer

# Load the breast cancer dataset
bcan = load_breast_cancer()

X = bcan.data
y = bcan.target

X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.33, random_state=42)


# normalize the data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)
print("Shapr of training data is ",X_train.shape)


Shapr of training data is  (381, 30)


In [15]:

# initialize the params1 and params2



# call the initialize_weights function above
params1 = [lib.initialize_weights(X_train.shape[1]) for i in range(100)]

# use the np.random.uniform() function

params2 = [np.random.uniform() for i in range(100)]

# we have a list of 100 weight vectors (params1) and 100 thresholds (params2)
# convert them to array
params1=np.array(params1)
params2=np.array(params2)

print("Shape of params 1 (weights)",params1.shape)
print("Shape of params 2 (thresholds)",params2.shape)



Shape of params 1 (weights) (100, 30)
Shape of params 2 (thresholds) (100,)


In [16]:

z = lib.objective_fn_vector(params1, params2, X_train, y_train)
# Find the global minimum
param1_min = params1[z.argmin()] # use z.argmin()
param2_min = params2[z.argmin()] # use z.argmin()

print("param1_min",param1_min,"param2_min",param2_min)


param1_min [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] param2_min 0.459553670010184


In [17]:

# Hyper-parameter of the algorithm
c1 = c2 = 0.1
w1 = np.array([np.random.uniform() for i in range(X_train.shape[1])])
w2 = 0.8 
# Create particles
n_particles = 20
np.random.seed(100)

params1=[lib.initialize_weights(X_train.shape[1]) for i in range(100)]# a vector of shape 100,4
# call the initialize_weights function above



params2=[np.random.uniform() for i in range(100)]# a vector of shape n_particles
# use the np.random.uniform() function

params1=np.array(params1)
params2=np.array(params2)

print("params1 shape is ",params1.shape,"params2 shape is ",params2.shape)


params1 shape is  (100, 30) params2 shape is  (100,)


In [18]:

# define velocity of each weight of every particle

V_param1 = [lib.initialize_weights(X_train.shape[1])*0.1 for i in range(n_particles)] # shape is same as params1
# once again can use initialize_weights function

#define velocity of each threshold of every particle
V_param2 = np.array([np.random.uniform()*0.1 for i in range(n_particles)])# shape is same as params2
# once again use np.random.uniform() function


# Initialize objective values
pbest = (params1,params2)
pbest_obj = lib.objective_fn_vector(params1, params2, X_train, y_train)
gbest=(params1[pbest_obj.argmin()],params2[pbest_obj.argmin()])
gbest_obj = pbest_obj.min()


print("pbest obj value for 20 particles are as follows",pbest_obj)
print("gbest obj value among all 20 particles is as follows",gbest_obj)
# note that gbest_obj should be the minimim of all pbest_obj


pbest obj value for 20 particles are as follows [0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461 0.6643461
 0.6643461 0.6643461 0.664346

In [19]:

def update():
    "Function to do one iteration of particle swarm optimization"
    global V_param1,V_param2, params1,params2, pbest, pbest_obj, gbest, gbest_obj
    # these have been already initialized in the previous cells
    
    # Update params
    r11,r12, r2 = np.random.rand(3)
    V_param1=w1*V_param1+c1*r11*(pbest[0] - params1)+ c2*r2*(gbest[0]-params1)
    V_param2=w2*V_param2+c1*r12*(pbest[1] - params2)+ c2*r2*(gbest[1]-params2)    
#     V = w * V + c1*r11*(pbest - params1) + c2*r2*(gbest.reshape(-1,1)-X)
    params1 = params1 + V_param1
    params2 = params2 + V_param2
    
    obj = lib.objective_fn_vector(params1, params2, X_train, y_train)
    for i in range(pbest[0].shape[0]):
        if pbest_obj[i]>=obj[i]:
            
            pbest[0][i]=params1[i] # update pbest[0][i] with value of params1[i]
            pbest[1][i]=params2[i] # update pbest[1][i] 
            pbest_obj[i]=obj[i]    # also update pbest_obj[i]

            
    gbest=(params1[pbest_obj.argmin()],params2[pbest_obj.argmin()]) # update gbest to contain the best from params1 and params 2
    gbest_obj = pbest_obj.min() # update gbest to get the minimum of pbest_obj
 

In [20]:

for i in range(1000):
    update()
print("PSO found best solution at f({})={}".format(gbest, gbest_obj))
print("Global optimal at f({})={}".format([param1_min,param2_min], lib.objective_fn(param1_min, param2_min, X_train, y_train)))


ValueError: operands could not be broadcast together with shapes (20,30) (100,30) 

In [None]:
X_train.shape[0]