In [0]:
import random
import math
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score,confusion_matrix,classification_report

In [0]:
class solution:
    def __init__(self):
        self.best = 0
        self.bestIndividual=[]
        self.convergence = []
        self.optimizer=""
        self.objfname=""

In [0]:
def model_score(x_train, y_train, x_test, y_test, classes = 'multi'):
    
    #x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
    model = DecisionTreeClassifier(random_state = 101)
    #print(type(model))
    model.fit(x_train,y_train)
    
    test_predictions = model.predict(x_test)
    if classes == 'multi':
        return f1_score(y_test, test_predictions, average = 'macro')
    else:
        return f1_score(y_test, test_predictions)

In [0]:
def obj_func(pos_array,X_train,y_train,X_test, y_test, classes):
    alpha = 0.6
    feature_array = np.zeros_like(pos_array)
    for i in range(len(pos_array)):
        #print(1/(1+math.exp(-pos_array[i])))
        feature_array[i] = 1/(1+math.exp(-pos_array[i]))
    feature_array = [round(i) for i in feature_array]
    feature_index = [i for i in range(len(feature_array)) if feature_array[i] > 0]
    #print(len(feature_index))
    if (len(feature_index)==0):
        return [0,0]
    
    f1_score = model_score(X_train, y_train, X_test, y_test, classes = classes)
    fitness_score = alpha*f1_score + (1-alpha)*(X_train.shape[1]-len(feature_index))/X_train.shape[1]
    return [fitness_score,f1_score]

def crossover(mother, father):

    child = [None]*len(mother)
    for param in range(len(mother)):
        child[param] = random.choice([mother[param], father[param]])

    return child


In [0]:
def WOA(X_train, y_train, X_test, y_test, objf = obj_func, classes = 'multi', lb = -100, ub = 100,
        SearchAgents_no = 30, Max_iter = 50):

    dim = X_train.shape[1]        
    Leader_pos=np.zeros(dim)
    Leader_score=0
    Leader_f1 = 0  
 
    Positions = np.zeros((SearchAgents_no, dim))
    for i in range(dim):
        Positions[:, i] = np.random.uniform(0,1,SearchAgents_no) *(ub-lb)+lb
  
    convergence_curve=np.zeros(Max_iter)
 
    s=solution()

    t=0 
 
    while t<Max_iter:
        for i in range(0,SearchAgents_no):
            for j in range(dim):        
                Positions[i,j]=np.clip(Positions[i,j], lb, ub)
 
            fitness,f1score=objf(Positions[i,:],X_train,y_train, X_test, y_test, classes = classes)
       
            if fitness>Leader_score: 
                Leader_score=fitness; 
                Leader_pos=Positions[i,:].copy()
                Leader_f1= f1score  
        
        a=2-t*((2)/Max_iter);  
   
        for i in range(0,SearchAgents_no):
            r1=random.random() 
            r2=random.random() 
            
            A=2*a*r1-a                                            
            C=2*r2                                                
            
            b=1            
            l=random.randint(-1,1)                       
            
            p = random.random() 

            if p<0.5:
                
                if abs(A)>=1: 
                    rand_leader_index = math.floor(SearchAgents_no*random.random());
                    X_rand = Positions[rand_leader_index, :]
                    Positions[i,:]=crossover(X_rand, Positions[i,:])      
                    
                elif abs(A)<1:
                    D_Leader=abs(C*Leader_pos-Positions[i,:])
                    Positions[i,:]=crossover(Positions[i,:], D_Leader)    
                        
            elif p>=0.5:  
              
                distance2Leader=abs(Leader_pos-Positions[i,:])                    
                Positions[i,:]=distance2Leader*math.exp(b*l)*math.cos(l*2*math.pi)+Leader_pos
                
                    
        convergence_curve[t]=Leader_score
    
        t=t+1
    
    feature_array = np.zeros_like(Leader_pos)
    for i in range(len(Leader_pos)):
        feature_array[i] = 1/(1+math.exp(-Leader_pos[i]))
    feature_array = [round(i) for i in feature_array]
    feature_index = [i for i in range(len(feature_array)) if feature_array[i] > 0]
    
    s.convergence=convergence_curve
    s.optimizer="WOA"   
    s.objfname=objf.__name__
    s.bestfitness = Leader_score
    s.bestf1 = Leader_f1
    s.bestIndividual = feature_index
    return s

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
X_train=np.load("/content/drive/My Drive/Major2/X_train.npy")
y_train=np.load("/content/drive/My Drive/Major2/y_train.npy")
X_test=np.load("/content/drive/My Drive/Major2/X_test.npy")
y_test=np.load("/content/drive/My Drive/Major2/y_test.npy")

In [0]:
X_train = X_train.reshape(840, 208*176)
X_test = X_test.reshape(360, 208*176)

In [0]:
X_train.shape

(840, 36608)

In [0]:
sol = WOA(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test,Max_iter=50)

In [0]:
print(sol.bestIndividual)

[38, 57, 60, 67, 80, 174, 198, 315, 316, 330, 356, 370, 427, 462, 517, 559, 572, 788, 806, 831, 834, 870, 873, 894, 942, 944, 967, 977, 1038, 1062, 1089, 1109, 1140, 1162, 1189, 1198, 1224, 1243, 1249, 1259, 1268, 1303, 1342, 1349, 1354, 1359, 1403, 1408, 1443, 1513, 1519, 1526, 1556, 1579, 1581, 1688, 1690, 1756, 1811, 1821, 1851, 1907, 1957, 1965, 1974, 1979, 2006, 2012, 2066, 2071, 2147, 2188, 2207, 2248, 2258, 2262, 2282, 2285, 2299, 2410, 2412, 2454, 2455, 2531, 2579, 2618, 2620, 2630, 2640, 2660, 2675, 2683, 2686, 2712, 2756, 2876, 2877, 2881, 2893, 2905, 2925, 2959, 2995, 3004, 3015, 3022, 3051, 3087, 3098, 3201, 3224, 3260, 3274, 3284, 3328, 3354, 3375, 3437, 3446, 3456, 3503, 3586, 3599, 3611, 3619, 3630, 3673, 3688, 3713, 3725, 3738, 3774, 3794, 3799, 3862, 3927, 3979, 4038, 4040, 4057, 4109, 4142, 4189, 4224, 4237, 4250, 4286, 4298, 4382, 4444, 4491, 4496, 4505, 4520, 4543, 4565, 4610, 4682, 4748, 4824, 4903, 4920, 4923, 4926, 4931, 4937, 4987, 4993, 5006, 5010, 5124, 5151, 

In [0]:
indices=sol.bestIndividual

In [0]:
print(len(sol.bestIndividual))

1239


In [0]:
X_train_WG=X_train[:,indices]
X_test_WG=X_test[:,indices]

In [0]:
X_train_WG.shape

(840, 1239)

In [0]:
np.save("X_train_WG.npy",X_train_WG)
np.save("y_train_WG.npy",y_train)
np.save("X_test_WG.npy",X_test_WG)
np.save("y_test_WG.npy",y_test)

In [0]:
from google.colab import files
files.download('X_train_WG.npy')
files.download('X_test_WG.npy')
files.download('y_train_WG.npy')
files.download('y_test_WG.npy')