# MERCS 101 - Lecture 02: Regression

This is the first part of the tutorial, focusing on MERCS as a simple classifier.

## Preliminaries

### External Imports

In [1]:
import numpy as np
import os
import sys
from sklearn.metrics import (mean_absolute_error,
                             mean_squared_error,
                             mean_squared_log_error)
import pandas as pd

### MERCS imports

In [2]:
sys.path.insert(0, '..') # We add the parent dir to the path
from src.mercs.core import MERCS
from src.mercs.utils import *

import src.datasets as datasets

## Induction

### Importing Data

First, we import the nursery dataset.

In [3]:
train, test = datasets.load_slump()

load_example_dataset is loading fname: ../resc/data/slump_train.csv

load_example_dataset is loading fname: ../resc/data/slump_test.csv



This is a fully numerical dataset

In [4]:
train.head()

Unnamed: 0,Var0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8,Var9
0,0.57384,0.42487,0.403846,0.625,0.315068,0.573267,0.150727,0.793103,0.724138,0.430576
1,0.109705,0.772021,0.734615,0.25,0.520548,0.394852,0.403213,0.0,0.0,0.579342
2,0.105485,0.766839,0.734615,0.2375,0.794521,0.386078,0.391737,0.034483,0.0,0.595549
3,0.105485,0.766839,0.730769,0.2375,1.0,0.380228,0.384086,0.103448,0.025862,0.60208
4,0.07173,0.580311,0.553846,0.75,0.383562,0.628839,0.066565,0.689655,0.758621,0.232946


In [5]:
test.head()

Unnamed: 0,Var0,Var1,Var2,Var3,Var4,Var5,Var6,Var7,Var8,Var9
0,0.586498,0.466321,0.446154,0.25,0.315068,0.473823,0.487376,0.0,0.0,0.65046
1,0.742616,0.580311,0.0,0.75,0.383562,0.251536,0.567712,0.793103,0.655172,0.26536
2,0.780591,0.601036,0.0,0.45,0.383562,0.321732,0.659526,0.87931,0.810345,0.304306
3,0.662447,0.549223,0.523077,0.5875,0.109589,0.114068,0.525631,0.827586,0.465517,0.582487
4,0.037975,0.549223,0.526923,0.6125,0.109589,0.488447,0.475899,0.827586,0.810345,0.258829


### Training

In [6]:
model = MERCS()

In [7]:
ind_parameters = {'ind_type':           'RF',
                  'ind_n_estimators':   10,
                  'ind_max_depth':      4}

sel_parameters = {'sel_type':           'Base',
                  'sel_its':            4,
                  'sel_param':          1}

In [8]:
model.fit(train, **ind_parameters, **sel_parameters)

  return bool(asarray(a1 == a2).all())


## Inference

### Prediction

In [9]:
code = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1]

target_boolean = np.array(code) == 1
y_true = test[test.columns.values[target_boolean]].values

In [10]:
y_true

array([[0.        , 0.6504596 ],
       [0.65517241, 0.26536043],
       [0.81034483, 0.30430576],
       [0.46551724, 0.5824867 ],
       [0.81034483, 0.25882922],
       [0.6637931 , 0.27842283],
       [0.32758621, 0.37203677],
       [0.75      , 0.4109821 ],
       [0.18965517, 0.32994678],
       [0.56896552, 0.32027092],
       [0.63793103, 0.22327044],
       [0.75862069, 0.21988389],
       [0.94827586, 0.45960329],
       [0.44827586, 0.46274794],
       [0.56034483, 0.40130624],
       [0.68965517, 0.33333333],
       [0.60344828, 0.2394775 ],
       [1.        , 0.51451379],
       [0.12931034, 0.50145138],
       [0.28448276, 0.50798258],
       [0.37068966, 0.44339623],
       [0.12068966, 0.37856797],
       [1.        , 0.66013546],
       [0.        , 0.5890179 ],
       [0.        , 0.59554911],
       [0.        , 0.57619739],
       [0.75862069, 0.53072085],
       [0.98275862, 0.6504596 ],
       [0.        , 0.79293662],
       [0.        , 0.79922593],
       [0.

In [11]:
pred_parameters = {'pred_type':     'IT',
                   'pred_param':    0.1,
                   'pred_its':      4}

In [12]:
y_pred = model.predict(test,
                       **pred_parameters,
                       qry_code=code)

SETTINGS.PY: I AM READING A SINGLE QUERY CODE, I.E: [0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
Predicting q_code: [0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
Type of numer_res: <class 'list'> 
And type of numer_res[0]: <class 'numpy.ndarray'>
And shape of numer_res[0]: (33, 1)


Type of numer_res: <class 'list'> 
And type of numer_res[0]: <class 'numpy.ndarray'>
And shape of numer_res[0]: (33, 1)


Type of numer_res: <class 'list'> 
And type of numer_res[0]: <class 'numpy.ndarray'>
And shape of numer_res[0]: (33, 1)


Type of numer_res: <class 'list'> 
And type of numer_res[0]: <class 'numpy.ndarray'>
And shape of numer_res[0]: (33, 1)


Type of numer_res: <class 'list'> 
And type of numer_res[0]: <class 'numpy.ndarray'>
And shape of numer_res[0]: (33, 1)


Type of numer_res: <class 'list'> 
And type of numer_res[0]: <class 'numpy.ndarray'>
And shape of numer_res[0]: (33, 1)


Type of numer_res: <class 'list'> 
And type of numer_res[0]: <class 'numpy.ndarray'>
And shape of numer_res[0]: (33, 1)


Type of numer

In [13]:
y_pred

array([[0.06163793, 0.44132801],
       [0.7362069 , 0.35675496],
       [0.69185345, 0.40109458],
       [0.79112069, 0.53217223],
       [0.75556034, 0.28793541],
       [0.6475    , 0.30042937],
       [0.25396552, 0.42712869],
       [0.62905172, 0.39920779],
       [0.43103448, 0.39887518],
       [0.61935345, 0.36513062],
       [0.67232759, 0.35948234],
       [0.65094828, 0.33841316],
       [0.77349138, 0.39605104],
       [0.54586207, 0.39646831],
       [0.61375   , 0.38619376],
       [0.65866379, 0.35250363],
       [0.70383621, 0.30587808],
       [0.80581897, 0.48883648],
       [0.18146552, 0.32245404],
       [0.26982759, 0.33459724],
       [0.21918103, 0.34804669],
       [0.3299569 , 0.49918965],
       [0.82314655, 0.55646468],
       [0.03232759, 0.58577649],
       [0.03232759, 0.58577649],
       [0.03232759, 0.52259918],
       [0.75905172, 0.52040397],
       [0.82142241, 0.55646468],
       [0.0737069 , 0.62743106],
       [0.0737069 , 0.62257499],
       [0.

### Evaluation 

In [14]:
y_true = test[test.columns.values[np.array(code)==1]].values

In [15]:
obs_1 = mean_absolute_error(y_true, y_pred)
obs_2 = mean_squared_error(y_true, y_pred)
obs_3 = mean_squared_log_error(y_true, y_pred)

obs = [obs_1, obs_2, obs_3]

for o in obs:
    assert isinstance(o, (int, float))
    assert 0 <= o 

In [16]:
obs_3

0.0059014208413006464