# Feature selection on Breast Caner Wisconsin (Classification Problem)

In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.append("../src/UBayFS")

from UBaymodel import UBaymodel
from UBayconstraint import UBayconstraint

In [2]:
data = pd.read_csv("./data/data.csv")
labels = pd.read_csv("./data/labels.csv").replace(("M","B"),(0,1)).astype(int)

## Build UBaymodel

In [3]:
model = UBaymodel(data=data,
                 target = labels,
                 feat_names = data.columns,
                 weights = [0.01],
                 M = 50)

100%|██████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 53.87it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 58.53it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 65.39it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 62.92it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 55.86it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 81.80it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 59.72it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 43.95it/s]
100%|███████████████████████████████████

In [4]:
model.constraints

[]

In [5]:
model.getOptim()

{'optim_method': 'GA', 'popsize': 100, 'maxiter': 100}

In [6]:
model.ncol

30

In [7]:
weights = np.tile(np.array([10,15,20,16,15,10,12,17,21,14]),3)
strength = 1
weights = weights * strength / np.sum(weights)
weights
#print(weights.shape)
model.setWeights(weights)

In [8]:
model.getWeights()

array([0.02222222, 0.03333333, 0.04444444, 0.03555556, 0.03333333,
       0.02222222, 0.02666667, 0.03777778, 0.04666667, 0.03111111,
       0.02222222, 0.03333333, 0.04444444, 0.03555556, 0.03333333,
       0.02222222, 0.02666667, 0.03777778, 0.04666667, 0.03111111,
       0.02222222, 0.03333333, 0.04444444, 0.03555556, 0.03333333,
       0.02222222, 0.02666667, 0.03777778, 0.04666667, 0.03111111])

In [9]:
constraints = UBayconstraint(rho=np.array([np.Inf, 0.1, 1, 1]), 
                             constraint_types=["max_size", "must_link", "cannot_link", "cannot_link","jjj"], 
                             constraint_vars=[10, [0,10,20], [0,9], [19,22,23]], 
                             num_elements=data.shape[1])

In [10]:
model.setConstraints(constraints)

In [11]:
model.getConstraints()

{0: {'A': array([[ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
           1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
           1.,  1.,  1.,  1.],
         [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -1.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.],
         [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0., -1.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.],
         [-1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.],
         [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,
           0.,  0.,  0.,  0.,  0.,  0.,  0., -1.,  0.,  0.,  0.,  0.,  0.,
           0.,  0.,  0.,  0.],
         [-1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
           0

In [12]:
model.setOptim(optim_method="GA", popsize=100, maxiter=200)

In [13]:
model.getOptim()

{'optim_method': 'GA', 'popsize': 100, 'maxiter': 200}

In [14]:
model.train()

(                         0
 mean.radius              1
 mean.texture             0
 mean.perimeter           1
 mean.area                0
 mean.smoothness          0
 mean.compactness         1
 mean.concavity           0
 mean.concave.points      0
 mean.symmetry            0
 mean.fractal.dimension   0
 radius.error             0
 texture.error            0
 perimeter.error          1
 area.error               0
 smoothness.error         0
 compactness.error        0
 concavity.error          0
 concave.points.error     0
 symmetry.error           0
 fractal.dimension.error  0
 worst.radius             1
 worst.texture            1
 worst.perimeter          1
 worst.area               0
 worst.smoothness         0
 worst.compactness        1
 worst.concavity          1
 worst.concave.points     1
 worst.symmetry           0
 worst.fractal.dimension  0,
 ['mean.radius',
  'mean.perimeter',
  'mean.compactness',
  'perimeter.error',
  'worst.radius',
  'worst.texture',
  'worst.perim

In [15]:
state = np.zeros(30)
state[[2,24,23]] = 1
model.evaluateFS(state=state)

{'cardinality': 3.0,
 'total utility': -0.864,
 'posterior feature utility': 0.136,
 'admissibility': 0.0,
 'number of violated constraints': 0,
 'average feature correlation': 0.449}

# Block model

## Version 1 with block list

In [16]:
block_list = [np.arange(0,10), np.arange(10,20), np.arange(20,30)]

In [17]:
block_constraints = UBayconstraint(rho=np.array([1]), 
                             constraint_types=["max_size"], 
                             constraint_vars=[1], 
                             num_elements=len(block_list),
                                block_list = block_list)

In [18]:
block_constraints.A

array([[1., 1., 1.]])

In [19]:
block_constraints.b

array([1.])

In [20]:
block_constraints.rho

array([1.])

In [21]:
pd.DataFrame(block_constraints.block_matrix)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## Version 2 with block matrix

In [22]:
block_matrix = np.zeros((3, model.ncol))
block_matrix[0,np.arange(0,10)] = 1
block_matrix[1,np.arange(10,20)] = 1
block_matrix[2,np.arange(20,30)] = 1


In [23]:
pd.DataFrame(block_matrix)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [24]:
block_matrix.shape[1]

30

In [25]:
block_constraints_2 = UBayconstraint(rho=np.array([1]), 
                             constraint_types=["max_size", "must_link"], 
                             constraint_vars=[1,[1,2]], 
                             num_elements=3,
                                block_matrix = block_matrix)

In [26]:
# define block weights

def build_block_weights(blocks, weights):
    weights_ass = []
    for i in blocks:
        weights_ass.append(weights[i])
    return np.array(weights_ass)

In [27]:
prior_weights = build_block_weights(blocks = np.repeat([0,1,2], 10), weights=np.array([0.5,1,2]))

## Build model and add constraints

In [28]:
model = UBaymodel(data=data,
                 target = labels.values,
                 feat_names = data.columns,
                 weights = prior_weights,
                 nr_features = 10)

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 59.85it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 55.08it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 65.09it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 67.28it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 60.03it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 48.89it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 67.06it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 62.08it/s]
100%|███████████████████████████████████

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 49.71it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 47.74it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 63.23it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 46.20it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 64.05it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 63.65it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 64.47it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 59.66it/s]
100%|███████████████████████████████████

In [29]:
pd.DataFrame(block_constraints.A)

Unnamed: 0,0,1,2
0,1.0,1.0,1.0


In [30]:
pd.DataFrame(block_constraints_2.A)

Unnamed: 0,0,1,2
0,1.0,1.0,1.0
1,0.0,1.0,-1.0
2,0.0,-1.0,1.0


In [31]:
model.setConstraints(block_constraints)
model.setConstraints(block_constraints_2, append=True)

# add max-size constraint
model.setConstraints(UBayconstraint(rho=np.array([1]), 
                             constraint_types=["max_size"], 
                             constraint_vars=[3], 
                             num_elements=model.ncol),
                    append=True)

In [32]:
model.getConstraints()

{0: {'A': array([[ 1.,  1.,  1.],
         [ 1.,  1.,  1.],
         [ 0.,  1., -1.],
         [ 0., -1.,  1.]]),
  'b': array([1., 1., 0., 0.]),
  'rho': array([1., 1., 1., 1.]),
  'block_matrix': array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1.,
          1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])},
 1: {'A': array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
          1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]),
  'b': array([3.]),
  'rho': array([1.]),
  'block_matrix': array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 1., 0., 0., 0.

In [33]:
model.train()

(                         0
 mean.radius              0
 mean.texture             0
 mean.perimeter           1
 mean.area                0
 mean.smoothness          0
 mean.compactness         0
 mean.concavity           1
 mean.concave.points      1
 mean.symmetry            0
 mean.fractal.dimension   0
 radius.error             0
 texture.error            0
 perimeter.error          0
 area.error               0
 smoothness.error         0
 compactness.error        0
 concavity.error          0
 concave.points.error     0
 symmetry.error           0
 fractal.dimension.error  0
 worst.radius             0
 worst.texture            0
 worst.perimeter          0
 worst.area               0
 worst.smoothness         0
 worst.compactness        0
 worst.concavity          0
 worst.concave.points     0
 worst.symmetry           0
 worst.fractal.dimension  0,
 ['mean.perimeter', 'mean.concavity', 'mean.concave.points'])

In [34]:
model.counts

mean.radius                100
mean.texture                 0
mean.perimeter             100
mean.area                   99
mean.smoothness              0
mean.compactness             0
mean.concavity             100
mean.concave.points        100
mean.symmetry                0
mean.fractal.dimension       0
radius.error                 0
texture.error                0
perimeter.error              0
area.error                   0
smoothness.error             0
compactness.error            0
concavity.error              0
concave.points.error         0
symmetry.error               0
fractal.dimension.error      1
worst.radius               100
worst.texture                0
worst.perimeter            100
worst.area                 100
worst.smoothness             0
worst.compactness            1
worst.concavity             99
worst.concave.points       100
worst.symmetry               0
worst.fractal.dimension      0
dtype: object