In [1]:
import random
import math
import numpy as np
import pandas as pd

In [2]:
dataset = pd.read_csv('LPC.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
#Initializations

Max_iteration=30 #max no. of iterations
noP = 30 #No. of artificial bats
noV = np.size(X,1); #dimension of search variables
A = 0.9 #Loudness
r = 0.2 #Pulse rate
pi = math.pi      


In [4]:
noV

17

In [5]:
#define BBA

def BBA(n, A, r, d, Max_iter, CostFunction):
        
    Qmin = 0 #min freq
    Qmax = 2 #max freq
    
    N_iter = 0 #total no.  function evalutations
    
    #initialize arrays
    Q = np.zeros((n,1)) #Freq
    v = np.zeros((n,d)) #Velocities
    S = np.zeros((n,d)) #empty solution matrix 
    #cg_curve = np.zeros((1, Max_iter)) #convergence curve which i prolly wont need
    Sol = np.zeros((n,d))
    Fitness=[]    
    
    for i in range(0,n):
        for j in range(0,d):
            if random.random()<=0.5:
                Sol[i][j]=0
            else:
                Sol[i][j]=1
    
    for a in range(0,n):
        Fitness.append(CostFunction(Sol[a,:]))
    
    # Find the current best
    fmin = min(Fitness) 
    I = Fitness.index(fmin)
    best = Sol[I,:] #I is the position of 1s
    
    #Start the iterations -- Binary Bat Algorithm
    while(N_iter<Max_iter):
        N_iter += 1
       # cg_curve[N_iter] = fmin
        for i in range(0,n):
            for j in range(0,d):
                Q[i] = Qmin+(Qmin+Qmax)*random.random() #Eq 3 in paper
                v[i,j] = v[i,j] + (Sol[i,j]-best[j])*Q[i] #Eq 1 in paper
                V_shaped_transfer_function=abs((2/pi)*math.atan((pi/2)*v[i,j])); #Eq 9 in the paper
                
                
                if random.random()<V_shaped_transfer_function: #Eq 10 in the paper
                    S[i,j]=int(not S[i,j]) 
                if random.random()>r:
                    S[i,j]=best[j]
             
            Fnew = CostFunction(Sol[i,:])
            
            if (Fnew<=Fitness[i] and random.random()<A): #solution improves and not too loud
                Sol[i,:] = S[i,:]
                Fitness[i] = Fnew
            
            #Update current best
            if Fnew <= fmin:
                best = S[i,:]
                fmin = Fnew
                
            #LAHC Local Search
            
            Best = LAHC(CostFunction , d ,best)
            
            
            
    
    #Output
    print("Number of evaluations : ", str(N_iter))
    print(" fmin=", str(fmin))
    return [Best,fmin]

In [6]:
#define LAHC
def LAHC(CostFunction, d, Candidate):
    
    Lfa = 10
    s = [random.randint(0, 1) for i in range(d)] # initial solution is produced
    
    Cs = CostFunction(s) # cost of solution
    best = s # best solution
    #Cbest = Cs # cost of best solution
    f = Cs * np.ones(Lfa) # If Lfa is large, an array will be more efficient than a list
    I = 0
    I_idle=0
                
    while (I<100000) and (I_idle>I*0.02):
                    
        candidate_sol = Candidate # Sol[i,:]
        candidate_cost = CostFunction(candidate_sol) # cost function for the candidate
                    
        if candidate_cost >= Cs:    
            I_idle = I_idle + 1
        else:   
            I_idle = 0
                        
        v = I % Lfa
                    
        if candidate_cost <= f[v] or candidate_cost <= Cs: # accept the candidate
            s = candidate_sol
        else:
            pass # reject the candidate
                    
        if Cs < f[v]:
            f[v] = Cs
                    
        I = I + 1
                    
    return best
                
                

In [7]:
#Define cost func
def CostFunction(solution): #1 X d wala matrix
    np_sol = np.asarray(solution) #np_sol is an 1 X d array 
    for elements in np_sol:
        no_of_zeroes = np.count_nonzero(elements==0)
    
    unselected_features = no_of_zeroes/noV
    
    # Importing the dataset
    dataset = pd.read_csv('merged_mfcc_lpc_IITM900.csv')
    X = dataset.iloc[:, :-1].values
    y = dataset.iloc[:, -1].values
    # Splitting the dataset into the Training set and Test set
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
    
    #from X_train take only those values corresponding to 1
    list1 = np.nonzero(np_sol)
    indices=[]
    for i in range(0,len(list1[0])):
        indices.append(list1[0][i])
    
    X_train_new=X_train[:,indices]
    X_test_new=X_test[:,indices]
    
    # Feature Scaling
    from sklearn.preprocessing import StandardScaler
    sc = StandardScaler()
    X_train_new = sc.fit_transform(X_train_new)
    X_test_new = sc.transform(X_test_new)
    
    
    # Training the Kernel SVM model on the Training set
    from sklearn.ensemble import RandomForestClassifier
    classifier = RandomForestClassifier(n_jobs = -1, verbose = 0, n_estimators=50, criterion='entropy', random_state = 0)
    classifier.fit(X_train_new, y_train)

    # Predicting the Test set results
    y_pred = classifier.predict(X_test_new)
    
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import classification_report

    accuracy = accuracy_score(y_test,y_pred)
    
    
    alpha=0.9
    return -(alpha*accuracy+(1-alpha)*unselected_features)

In [None]:
%%time
gbest = BBA(noP, A, r, noV, Max_iteration, CostFunction)

In [None]:
(gbest[0])

In [None]:
indices1=[]
for i in range(0,len(gbest[0])):
    indices1.append(gbest[0][i])

In [None]:
List = []

for i in range(0,len(gbest[0])):
    if gbest[0][i] == 1:
        List.append(i)
        
df_train = pd.read_csv("merged_mfcc_lpc_IITM900.csv")
y = (df_train['label'])
df_train1 = df_train[df_train.columns[List]]
l = []
for i in range(0,len(List)):
    l.append(i)
    
df_train1.columns = l

df_train1['label'] = y
df_train1

In [None]:
df_train1.to_csv('merged_mfcc_lpc_IITM900_BBALAHC.csv')

In [None]:
2+2