<a href="https://colab.research.google.com/github/fsrt16/Algorithms/blob/master/Feature_Selection_using_Pyswarms.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install pyswarms

Collecting pyswarms
  Downloading pyswarms-1.3.0-py2.py3-none-any.whl (104 kB)
[K     |████████████████████████████████| 104 kB 4.4 MB/s 
Installing collected packages: pyswarms
Successfully installed pyswarms-1.3.0


In [3]:
# Import modules
import numpy as np
import seaborn as sns
import pandas as pd

# Import PySwarms
import pyswarms as ps

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [4]:
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=100, n_features=15, n_classes=3,
                           n_informative=4, n_redundant=1, n_repeated=2,
                           random_state=1)

In [5]:
# Plot toy dataset per feature
df = pd.DataFrame(X)
df['labels'] = pd.Series(y)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,labels
0,0.655084,-2.128963,-0.685016,1.922747,0.645288,-2.224576,1.198653,-1.442570,0.225801,1.922747,2.454673,1.198653,0.884753,-0.601173,0.918193,0
1,-0.303401,-1.378328,-0.566002,0.114298,-0.239843,-0.310322,1.501327,-0.044018,-0.472958,0.114298,0.492101,1.501327,-0.494118,-0.512464,0.141817,0
2,0.724706,-0.986521,0.389249,-1.895799,0.042914,0.472371,-0.318019,0.951178,0.644809,-1.895799,0.418651,-0.318019,-0.271892,-2.731398,0.936142,0
3,0.049146,-0.588923,0.326328,0.922614,1.131195,-0.983339,0.549084,1.621353,1.479451,0.922614,-0.951512,0.549084,-0.470488,0.787868,-0.696832,2
4,0.378306,-1.327988,0.647906,0.877558,-1.091076,-1.513970,1.208688,-0.459604,1.786182,0.877558,-1.650467,1.208688,-0.393068,1.295161,-0.512984,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,-0.177226,2.043693,1.136525,0.267609,-0.840022,2.573524,-0.365685,-1.487846,-0.479539,0.267609,-0.702859,-0.365685,1.137251,-1.456116,0.617562,2
96,0.540818,-0.568785,0.470516,2.667254,-0.087918,-3.171549,-1.952123,-1.275209,-0.458240,2.667254,1.099393,-1.952123,-0.022824,-0.656279,-1.181429,0
97,0.671385,-0.336407,0.563321,1.780646,0.784192,-0.075812,0.302305,0.317170,-0.217989,1.780646,-0.837611,0.302305,0.572541,-2.339250,-0.068524,0
98,1.917319,-0.306018,1.849917,-0.225949,0.609191,1.034543,1.791212,0.073941,0.314464,-0.225949,-0.253615,1.791212,0.297657,0.307610,-0.000731,1


In [6]:
from sklearn import linear_model

# Create an instance of the classifier
classifier = linear_model.LogisticRegression()

# Define objective function
def f_per_particle(m, alpha):
    """Computes for the objective function per particle

    Inputs
    ------
    m : numpy.ndarray
        Binary mask that can be obtained from BinaryPSO, will
        be used to mask features.
    alpha: float (default is 0.5)
        Constant weight for trading-off classifier performance
        and number of features

    Returns
    -------
    numpy.ndarray
        Computed objective function
    """
    total_features = 15
    # Get the subset of the features from the binary mask
    if np.count_nonzero(m) == 0:
        X_subset = X
    else:
        X_subset = X[:,m==1]
    # Perform classification and store performance in P
    classifier.fit(X_subset, y)
    P = (classifier.predict(X_subset) == y).mean()
    # Compute for the objective function
    j = (alpha * (1.0 - P)
        + (1.0 - alpha) * (1 - (X_subset.shape[1] / total_features)))

    return j
def f(x, alpha=0.88):
    """Higher-level method to do classification in the
    whole swarm.

    Inputs
    ------
    x: numpy.ndarray of shape (n_particles, dimensions)
        The swarm that will perform the search

    Returns
    -------
    numpy.ndarray of shape (n_particles, )
        The computed loss for each particle
    """
    n_particles = x.shape[0]
    j = [f_per_particle(x[i], alpha) for i in range(n_particles)]
    return np.array(j)

In [9]:
# Initialize swarm, arbitrary
options = {'c1': 0.5, 'c2': 0.5, 'w':0.9, 'k': 30, 'p':2}

# Call instance of PSO
dimensions = 15 # dimensions should be the number of features   
optimizer = ps.discrete.BinaryPSO(n_particles=30, dimensions=dimensions, options=options)

# Perform optimization
cost, pos = optimizer.optimize(f,  iters=1000, verbose=2)

2021-11-13 13:55:03,924 - pyswarms.discrete.binary - INFO - Optimize for 1000 iters with {'c1': 0.5, 'c2': 0.5, 'w': 0.9, 'k': 30, 'p': 2}
pyswarms.discrete.binary: 100%|██████████|1000/1000, best_cost=0.193
2021-11-13 13:59:03,037 - pyswarms.discrete.binary - INFO - Optimization finished | best cost: 0.19279999999999997, best pos: [1 1 1 1 1 1 1 1 1 1 0 1 1 1 1]


In [12]:
# Create two instances of LogisticRegression
classfier = linear_model.LogisticRegression()

# Get the selected features from the final positions
X_selected_features = X[:,pos==1]  # subset

# Perform classification and store performance in P
classifier.fit(X_selected_features, y)
y_pred = classifier.predict(X_selected_features)


In [13]:
from sklearn.metrics import classification_report
print(classification_report(y, y_pred))

              precision    recall  f1-score   support

           0       0.81      0.85      0.83        34
           1       0.84      0.81      0.83        32
           2       0.73      0.71      0.72        34

    accuracy                           0.79       100
   macro avg       0.79      0.79      0.79       100
weighted avg       0.79      0.79      0.79       100



In [14]:
# Create two instances of LogisticRegression
classfier = linear_model.LogisticRegression()


# Perform classification and store performance in P
classifier.fit(X, y)
y_pred = classifier.predict(X)


In [15]:
from sklearn.metrics import classification_report
print(classification_report(y, y_pred))

              precision    recall  f1-score   support

           0       0.75      0.79      0.77        34
           1       0.84      0.81      0.83        32
           2       0.67      0.65      0.66        34

    accuracy                           0.75       100
   macro avg       0.75      0.75      0.75       100
weighted avg       0.75      0.75      0.75       100

