# MERCS 101 - Lecture 01: Classification

This is the first part of the tutorial, focusing on MERCS as a simple classifier.

## Preliminaries

### External Imports

In [1]:
import numpy as np
import os
import sys
from sklearn.metrics import f1_score, accuracy_score, classification_report
import pandas as pd

### MERCS imports

In [2]:
sys.path.insert(0, '..') # We add the parent dir to the path
from src.mercs.core import MERCS
from src.mercs.utils import *

import src.datasets as datasets

  from numpy.core.umath_tests import inner1d


## Induction

### Importing Data

First, we import the nursery dataset.

In [44]:
train, test = datasets.load_nursery()

This is a fully nominal dataset

In [20]:
train.head()

Unnamed: 0,Var0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8
0,2,3,0,0,0,0,0,2,2
1,2,3,0,0,0,0,0,1,1
2,2,3,0,0,0,0,0,0,0
3,2,3,0,0,0,0,2,2,2
4,2,3,0,0,0,0,2,1,1


### Training

In [21]:
model = MERCS()

In [22]:
ind_parameters = {'ind_type':           'RF',
                  'ind_n_estimators':   30}

sel_parameters = {'sel_type':           'Base',
                  'sel_its':            4,
                  'sel_param':          2}

In [30]:
train.head()

Unnamed: 0,Var0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8
0,2,3,0,0,0,0,0,2,2
1,2,3,0,0,0,0,0,1,1
2,2,3,0,0,0,0,0,0,0
3,2,3,0,0,0,0,2,2,2
4,2,3,0,0,0,0,2,1,1


In [31]:
model.fit(train, **ind_parameters, **sel_parameters)

is_nominal in this model is: [1 1 1 1 1 1 1 1 1]



In [73]:
train.iloc[1:5,2] = np.nan

In [99]:
train.head(6)

Unnamed: 0,Var0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8
0,2,3,0.0,0,0,0,0,2,2
1,2,3,,0,0,0,0,1,1
2,2,3,,0,0,0,0,0,0
3,2,3,,0,0,0,2,2,2
4,2,3,,0,0,0,2,1,1
5,2,3,0.0,0,0,0,2,0,0


In [98]:
train.shape

(11680, 9)

In [87]:
m_desc = [0,-1]
m_targ = [2,3]

In [95]:
X_Y  = train.iloc[:,m_desc+m_targ].dropna().values

In [104]:
X_Y.shape

(11676, 4)

In [106]:
X_Y 

array([[2., 2., 0., 0.],
       [2., 0., 0., 0.],
       [2., 1., 0., 0.],
       ...,
       [0., 3., 2., 3.],
       [0., 3., 2., 3.],
       [0., 0., 2., 3.]])

In [107]:
len(m_desc)

2

In [108]:
X = X_Y[:,0:len(m_desc)]
Y = X_Y[:,len(m_desc):]

In [109]:
X

array([[2., 2.],
       [2., 0.],
       [2., 1.],
       ...,
       [0., 3.],
       [0., 3.],
       [0., 0.]])

In [110]:
Y

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       ...,
       [2., 3.],
       [2., 3.],
       [2., 3.]])

In [117]:
X_Y_again = np.hstack((X,Y))

In [119]:
np.array_equal(X_Y, X_Y_again)

True

In [74]:
train_data = train.values

In [75]:
train_data

array([[ 2.,  3.,  0., ...,  0.,  2.,  2.],
       [ 2.,  3., nan, ...,  0.,  1.,  1.],
       [ 2.,  3., nan, ...,  0.,  0.,  0.],
       ...,
       [ 0.,  4.,  2., ...,  1.,  2.,  3.],
       [ 0.,  4.,  2., ...,  1.,  1.,  3.],
       [ 0.,  4.,  2., ...,  1.,  0.,  0.]])

In [78]:
X_Y = train_data[:, m_desc+m_targ]

In [79]:
X_Y.shape

(11680, 4)

In [51]:
X_Y = train_data[:, m_desc+m_targ]

In [52]:
X_Y

array([[2, 0, 0, 0],
       [2, 0, 0, 0],
       [2, 0, 0, 0],
       ...,
       [0, 2, 3, 1],
       [0, 2, 3, 1],
       [0, 2, 3, 1]])

In [None]:
m_desc + m_targ

In [38]:
desc = X[:, [0,1]]

In [34]:
model.fit(train, **ind_parameters, **sel_parameters)

is_nominal in this model is: [1 1 1 1 1 1 1 1 1]



ValueError: Input contains NaN, infinity or a value too large for dtype('float32').

## Inference

### Prediction

In [8]:
code = [0,0,0,0,0,0,0,0,1]
len(code)

9

In [9]:
pred_parameters = {'pred_type':    'MI',
                   'pred_param':   1.0,
                   'pred_its':     8}

In [10]:
y_pred = model.predict(test,
                       **pred_parameters,
                       qry_code=code)

SETTINGS.PY: I AM READING A SINGLE QUERY CODE, I.E: [0, 0, 0, 0, 0, 0, 0, 0, 1]
Predicting q_code: [0, 0, 0, 0, 0, 0, 0, 0, 1]


In [11]:
y_pred

array([[4.],
       [0.],
       [1.],
       ...,
       [1.],
       [3.],
       [0.]])

### Evaluation 

In [12]:
y_true = test[test.columns.values[np.array(code)==1]].values

In [13]:
obs = f1_score(y_true, y_pred, average='macro')
obs

0.7804896144522

In [14]:
assert isinstance(obs, (int, float))
assert 0 <= obs <= 1