# MERCS 101 - Lecture 01: Classification

This is the first part of the tutorial, focusing on MERCS as a simple classifier.

## Preliminaries

### External Imports

In [1]:
import numpy as np
import os
import sys
from sklearn.metrics import f1_score, accuracy_score, classification_report
import pandas as pd

### MERCS imports

In [2]:
sys.path.insert(0, '..') # We add the parent dir to the path
from src.mercs.core import MERCS
from src.mercs.utils import *

import src.datasets as datasets

  from numpy.core.umath_tests import inner1d


## Induction

### Importing Data

First, we import the nursery dataset.

In [64]:
train, test = datasets.load_nursery()

This is a fully nominal dataset

In [65]:
train.head()

Unnamed: 0,Var0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8
0,2,3,0,0,0,0,0,2,2
1,2,3,0,0,0,0,0,1,1
2,2,3,0,0,0,0,0,0,0
3,2,3,0,0,0,0,2,2,2
4,2,3,0,0,0,0,2,1,1


### Training

In [5]:
model = MERCS()

In [6]:
ind_parameters = {'ind_type':           'RF',
                  'ind_n_estimators':   30}

sel_parameters = {'sel_type':           'Base',
                  'sel_its':            4,
                  'sel_param':          2}

In [7]:
model.fit(train, **ind_parameters, **sel_parameters)

is_nominal in this model is: [1 1 1 1 1 1 1 1 1]



In [8]:
mod = model.m_list[0]

In [66]:
train.head()

Unnamed: 0,Var0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8
0,2,3,0,0,0,0,0,2,2
1,2,3,0,0,0,0,0,1,1
2,2,3,0,0,0,0,0,0,0
3,2,3,0,0,0,0,2,2,2
4,2,3,0,0,0,0,2,1,1


In [67]:
train.nunique().values

array([3, 5, 4, 4, 3, 2, 3, 3, 5])

In [68]:
train.iloc[1:5,2]=np.nan

In [69]:
train.head(6)

Unnamed: 0,Var0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8
0,2,3,0.0,0,0,0,0,2,2
1,2,3,,0,0,0,0,1,1
2,2,3,,0,0,0,0,0,0
3,2,3,,0,0,0,2,2,2
4,2,3,,0,0,0,2,1,1
5,2,3,0.0,0,0,0,2,0,0


In [73]:
train.isnull().any().values

array([False, False,  True, False, False, False, False, False, False])

In [53]:
vals = train.values

In [54]:
vals[2].dtype

dtype('float64')

In [56]:
np.array([1,2,np.nan])

array([ 1.,  2., nan])

In [61]:
pd.api.types.is_float_dtype(train.iloc[:,2].dtype)

True

In [34]:
types = train.dropna().dtypes
types

Var0      int64
Var1      int64
Var2    float64
Var3      int64
Var4      int64
Var5      int64
Var6      int64
Var7      int64
Var8      int64
dtype: object

In [46]:
x=[True, False]
x

[True, False]

In [48]:
y = np.array(x).astype(int)

In [49]:
y

array([1, 0])

ValueError: invalid literal for int() with base 10: 'True'

In [31]:
train['Var2'] = train['Var2'].astype(int)

ValueError: Cannot convert non-finite values (NA or inf) to integer

In [28]:
pd.api.types.is_integer_dtype(types[2])

False

In [12]:
model.fit(train, **ind_parameters, **sel_parameters)

is_nominal in this model is: [1 1 0 1 1 1 1 1 1]



TypeError: Model with mixed targets [1, 2]

## Inference

### Prediction

In [None]:
code = [0,0,0,0,0,0,0,0,1]
len(code)

In [None]:
pred_parameters = {'pred_type':    'MI',
                   'pred_param':   1.0,
                   'pred_its':     8}

In [None]:
y_pred = model.predict(test,
                       **pred_parameters,
                       qry_code=code)

In [None]:
y_pred

### Evaluation 

In [None]:
y_true = test[test.columns.values[np.array(code)==1]].values

In [None]:
obs = f1_score(y_true, y_pred, average='macro')
obs

In [None]:
assert isinstance(obs, (int, float))
assert 0 <= obs <= 1